summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.ahub/sam/exclude.txt16
-rw-r--r--.ahub/tcchecker-tca/config.yaml23
-rw-r--r--.github/workflows/check-pr-commit.yml7
-rw-r--r--compiler/arser/include/arser/arser.h87
-rw-r--r--compiler/circle-eval-diff/CMakeLists.txt10
-rw-r--r--compiler/circle-eval-diff/driver/Driver.cpp115
-rw-r--r--compiler/circle-eval-diff/include/CircleEvalDiff.h30
-rw-r--r--compiler/circle-eval-diff/src/CircleEvalDiff.cpp217
-rw-r--r--compiler/circle-eval-diff/src/InputDataLoader.cpp235
-rw-r--r--compiler/circle-eval-diff/src/InputDataLoader.h98
-rw-r--r--compiler/circle-eval-diff/src/InputDataLoader.test.cpp65
-rw-r--r--compiler/circle-eval-diff/src/MetricPrinter.cpp479
-rw-r--r--compiler/circle-eval-diff/src/MetricPrinter.h127
-rw-r--r--compiler/circle-eval-diff/src/MetricPrinter.test.cpp312
-rw-r--r--compiler/circle-eval-diff/src/ModuleEvalDiff.cpp216
-rw-r--r--compiler/circle-eval-diff/src/ModuleEvalDiff.h67
-rw-r--r--compiler/circle-eval-diff/src/Tensor.cpp52
-rw-r--r--compiler/circle-eval-diff/src/Tensor.h3
-rw-r--r--compiler/circle-eval-diff/src/Tensor.test.cpp28
-rw-r--r--compiler/circle-execution-plan/CMakeLists.txt9
-rw-r--r--compiler/circle-execution-plan/src/CircleExecutionPlan.cpp29
-rw-r--r--compiler/circle-execution-plan/src/ExecutionPlanner.cpp70
-rw-r--r--compiler/circle-execution-plan/src/ExecutionPlanner.h2
-rw-r--r--compiler/circle-inspect/driver/Driver.cpp2
-rw-r--r--compiler/circle-inspect/requires.cmake1
-rw-r--r--compiler/circle-inspect/src/Dump.cpp20
-rw-r--r--compiler/circle-inspect/src/Reader.cpp127
-rw-r--r--compiler/circle-inspect/src/Reader.h87
-rw-r--r--compiler/circle-interpreter/CMakeLists.txt13
-rw-r--r--compiler/circle-interpreter/requires.cmake6
-rw-r--r--compiler/circle-interpreter/src/CircleInterpreter.cpp145
-rw-r--r--compiler/circle-operator-test/CMakeLists.txt18
-rw-r--r--compiler/circle-operator-test/README.md7
-rw-r--r--compiler/circle-operator-test/requires.cmake2
-rw-r--r--compiler/circle-operator-test/src/circle-operator.test.cpp248
-rw-r--r--compiler/circle-operator/CMakeLists.txt17
-rw-r--r--compiler/circle-operator/README.md70
-rw-r--r--compiler/circle-operator/driver/Driver.cpp112
-rw-r--r--compiler/circle-operator/requires.cmake4
-rw-r--r--compiler/circle-operator/src/Dump.cpp85
-rw-r--r--compiler/circle-operator/src/Dump.h45
-rw-r--r--compiler/circle-opselector/driver/Driver.cpp20
-rw-r--r--compiler/circle-part-value-test/CMakeLists.txt3
-rw-r--r--compiler/circle-partitioner-test/CMakeLists.txt3
-rw-r--r--compiler/circle-partitioner/CMakeLists.txt20
-rw-r--r--compiler/circle-partitioner/README.md23
-rw-r--r--compiler/circle-partitioner/requires.cmake1
-rw-r--r--compiler/circle-partitioner/src/CirclePartitioner.cpp83
-rw-r--r--compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt17
-rw-r--r--compiler/circle-quantizer-dredd-recipe-test/test.lst65
-rw-r--r--compiler/circle-quantizer/CMakeLists.txt1
-rw-r--r--compiler/circle-quantizer/requires.cmake1
-rw-r--r--compiler/circle-quantizer/src/CircleQuantizer.cpp96
-rw-r--r--compiler/circle-tensordump/driver/Driver.cpp4
-rw-r--r--compiler/circle-tensordump/src/Dump.cpp7
-rw-r--r--compiler/circle-tensordump/src/Reader.cpp117
-rw-r--r--compiler/circle-tensordump/src/Reader.h85
-rw-r--r--compiler/circle-verify/src/Driver.cpp2
-rw-r--r--compiler/circle2circle-dredd-recipe-test/test.lst3
-rw-r--r--compiler/circle2circle/CMakeLists.txt2
-rw-r--r--compiler/circle2circle/requires.cmake1
-rw-r--r--compiler/circle2circle/src/Circle2Circle.cpp504
-rw-r--r--compiler/circlechef/tools/file/Driver.cpp6
-rw-r--r--compiler/circlechef/tools/reverse/Driver.cpp6
-rw-r--r--compiler/circledump/CMakeLists.txt1
-rw-r--r--compiler/circledump/driver/Driver.cpp16
-rw-r--r--compiler/circledump/include/circleread/Model.h43
-rw-r--r--compiler/circledump/requires.cmake1
-rw-r--r--compiler/circledump/src/Dump.cpp14
-rw-r--r--compiler/circledump/src/Load.cpp133
-rw-r--r--compiler/circledump/src/OpPrinter.cpp6
-rw-r--r--compiler/circledump/src/Read.cpp119
-rw-r--r--compiler/circledump/src/Read.h106
-rw-r--r--compiler/cli/CMakeLists.txt2
-rw-r--r--compiler/coco/core/src/IR/Module.cpp2
-rw-r--r--compiler/coco/generic/src/IR/Data.cpp3
-rw-r--r--compiler/common-artifacts/CMakeLists.txt49
-rw-r--r--compiler/common-artifacts/exclude.lst3
-rw-r--r--compiler/common-artifacts/src/TestDataGenerator.cpp12
-rw-r--r--compiler/crew/CMakeLists.txt3
-rw-r--r--compiler/crew/src/PConfigIni.cpp71
-rw-r--r--compiler/crew/src/PConfigIni.test.cpp61
-rw-r--r--compiler/crew/src/test_read_semicolon.ini2
-rw-r--r--compiler/enco/core/src/CppGen/Host.cpp2
-rw-r--r--compiler/enco/core/src/CppGen/Subnet.cpp4
-rw-r--r--compiler/enco/core/src/Transforms/Split.cpp28
-rw-r--r--compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp2
-rw-r--r--compiler/kuma/src/IntervalSet.h1
-rw-r--r--compiler/loco/include/loco/IR/DataTypeTraits.h9
-rw-r--r--compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp12
-rw-r--r--compiler/luci-eval-driver/src/EvalDriver.cpp17
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst2
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h2
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h2
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h1568
-rw-r--r--compiler/luci-interpreter/pal/linux/KernelsToBuild.lst3
-rw-r--r--compiler/luci-interpreter/pal/linux/PALreference_ops.h22
-rw-r--r--compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst2
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALDequantize.h2
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALQuantize.h2
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALreference_ops.h1556
-rw-r--r--compiler/luci-interpreter/src/core/KernelParams.h5
-rw-r--r--compiler/luci-interpreter/src/kernels/Fill.cpp117
-rw-r--r--compiler/luci-interpreter/src/kernels/Fill.h47
-rw-r--r--compiler/luci-interpreter/src/kernels/Fill.test.cpp169
-rw-r--r--compiler/luci-interpreter/src/kernels/MirrorPad.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Pack.cpp5
-rw-r--r--compiler/luci-interpreter/src/kernels/Pack.test.cpp20
-rw-r--r--compiler/luci-interpreter/src/kernels/Pad.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/PadV2.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/ReduceMax.cpp181
-rw-r--r--compiler/luci-interpreter/src/kernels/ReduceMax.h50
-rw-r--r--compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp103
-rw-r--r--compiler/luci-interpreter/src/kernels/Shape.cpp70
-rw-r--r--compiler/luci-interpreter/src/kernels/Shape.h46
-rw-r--r--compiler/luci-interpreter/src/kernels/Shape.test.cpp89
-rw-r--r--compiler/luci-interpreter/src/kernels/SplitV.cpp28
-rw-r--r--compiler/luci-interpreter/src/kernels/StridedSlice.cpp5
-rw-r--r--compiler/luci-interpreter/src/loader/GraphLoader.cpp2
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Add.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Cast.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Div.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Elu.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Equal.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Exp.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Fill.cpp37
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Floor.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Gather.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Greater.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/If.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Less.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Logistic.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Maximum.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Mean.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Minimum.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Mul.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Neg.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/PRelu.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Pack.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Pad.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/PadV2.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Pow.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Quantize.cpp5
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp55
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Relu.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Relu6.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Reshape.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/SVDF.cpp5
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Shape.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Slice.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Softmax.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Split.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/SplitV.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Square.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Sub.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Tanh.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Transpose.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Unpack.cpp4
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/While.cpp4
-rw-r--r--compiler/luci-micro/CMakeLists.txt2
-rw-r--r--compiler/luci-micro/luci-interpreter/CMakeLists.txt15
-rw-r--r--compiler/luci-micro/luci-interpreter/README.md158
-rw-r--r--compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h144
-rw-r--r--compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h35
-rw-r--r--compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h84
-rw-r--r--compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h37
-rw-r--r--compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h34
-rw-r--r--compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h45
-rw-r--r--compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h47
-rw-r--r--compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h36
-rw-r--r--compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h186
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst62
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h33
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h124
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h37
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h199
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h35
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h192
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h44
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h33
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h114
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h34
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h33
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h32
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h45
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h32
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h44
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h37
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h37
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h190
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h78
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h38
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h35
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h35
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake65
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst77
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h33
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h73
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h67
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h37
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h127
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h35
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h91
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h34
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h31
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h61
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h35
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h34
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h33
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h32
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h34
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h47
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h55
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h32
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h44
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h39
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h39
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h37
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h37
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h90
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h33
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h47
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h38
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h35
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h33
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h35
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake82
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst62
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h33
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h73
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h37
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h85
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h35
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h91
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h44
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h33
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h61
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h34
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h33
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h32
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h45
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h32
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h44
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h37
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h37
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h258
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h62
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h38
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h35
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h35
-rw-r--r--compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake56
-rw-r--r--compiler/luci-micro/luci-interpreter/requires.cmake1
-rw-r--r--compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp96
-rw-r--r--compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp69
-rw-r--r--compiler/luci-micro/luci-interpreter/src/CMakeLists.txt61
-rw-r--r--compiler/luci-micro/luci-interpreter/src/Interpreter.cpp145
-rw-r--r--compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp51
-rw-r--r--compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp39
-rw-r--r--compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp45
-rw-r--r--compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt19
-rw-r--r--compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h36
-rw-r--r--compiler/luci-micro/luci-interpreter/src/core/Kernel.h75
-rw-r--r--compiler/luci-micro/luci-interpreter/src/core/KernelParams.h228
-rw-r--r--compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp201
-rw-r--r--compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h71
-rw-r--r--compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h60
-rw-r--r--compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp58
-rw-r--r--compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt15
-rw-r--r--compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp33
-rw-r--r--compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp113
-rw-r--r--compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h39
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp220
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Add.h50
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp357
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp139
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h44
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp122
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp194
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h54
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp283
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp188
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h49
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp272
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp104
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h45
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp100
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h73
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt43
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp143
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Cast.h43
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp241
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp149
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h48
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp268
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp456
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h59
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp707
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp80
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h45
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp115
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp451
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h57
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp622
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp79
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h43
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp149
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp152
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Div.h49
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp230
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp52
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Elu.h43
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp81
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp142
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Equal.h54
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp306
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp56
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Exp.h46
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp55
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp88
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h44
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp115
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp117
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Fill.h47
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp169
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp57
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Floor.h45
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp76
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp85
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h46
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp147
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp192
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h51
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp260
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp139
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Gather.h47
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp137
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp142
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Greater.h54
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp334
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp145
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h54
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp333
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/If.cpp94
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/If.h49
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp161
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp121
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h49
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp97
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp75
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h46
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp126
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp88
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h49
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp291
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp90
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h53
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp127
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp142
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Less.h54
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp334
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp142
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h54
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp334
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp65
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h44
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp157
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp92
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h48
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp124
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp62
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h47
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp101
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp60
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h46
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp78
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp49
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h44
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp104
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp94
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h52
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp148
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp150
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h52
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp139
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp65
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h47
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp82
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp346
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Mean.h55
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp240
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp65
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h47
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp82
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp172
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h45
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp225
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp150
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Mul.h52
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp292
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp58
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Neg.h46
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp71
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp142
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h54
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp306
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp136
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h48
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp192
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp211
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h59
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp397
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp142
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Pack.h46
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp163
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp114
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Pad.h43
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp109
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp108
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h44
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp90
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp79
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Pow.h46
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp140
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp160
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h43
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp254
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp114
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Relu.h51
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp168
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp88
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h50
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp149
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp90
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h43
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp82
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp74
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h45
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp255
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp74
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h45
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp231
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp81
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h43
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp71
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp66
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h46
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp90
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp241
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h56
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp341
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp70
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Shape.h46
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp89
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp153
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Slice.h44
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp70
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp92
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h49
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp117
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp103
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h45
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp123
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp79
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h45
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp65
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp81
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Split.h47
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp129
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp111
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h49
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp112
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp66
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h46
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp90
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp66
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Square.h46
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp52
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp64
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h47
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp78
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp86
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h44
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp74
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp150
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h47
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp112
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp164
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Sub.h49
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp266
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp93
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h52
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp164
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp128
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h296
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp84
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h44
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp115
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp351
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h65
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp353
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp84
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h46
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp148
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp198
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/Utils.h293
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/While.cpp116
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/While.h48
-rw-r--r--compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp101
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt39
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp344
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h55
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp104
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h52
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp1376
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp64
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h84
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp53
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h52
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h38
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp40
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp39
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp64
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp70
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp38
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp42
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp66
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp39
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp67
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp35
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp39
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp35
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp38
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp36
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp36
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp42
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp42
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp47
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp43
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp39
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp44
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp38
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp42
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp36
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp36
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp36
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp44
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp61
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp40
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp40
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp36
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp42
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp44
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp38
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp38
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp36
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp36
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp36
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp38
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp41
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp46
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp36
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp92
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp39
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp39
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp39
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp39
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp39
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp40
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp41
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp36
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp36
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp39
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp47
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp40
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp36
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp37
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp55
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp42
-rw-r--r--compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp47
-rw-r--r--compiler/luci-micro/standalone/CMakeLists.txt19
-rw-r--r--compiler/luci-pass-value-test/CMakeLists.txt7
-rw-r--r--compiler/luci-pass-value-test/test.lst9
-rw-r--r--compiler/luci-value-test/test.lst2
-rw-r--r--compiler/luci/export/src/CircleBuiltinTypesExtractor.h4
-rw-r--r--compiler/luci/export/src/CircleOps.lst1
-rw-r--r--compiler/luci/export/src/CircleTensorExporter.cpp6
-rw-r--r--compiler/luci/import/CMakeLists.txt1
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes.h1
-rw-r--r--compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h37
-rw-r--r--compiler/luci/import/include/luci/ImporterEx.h39
-rw-r--r--compiler/luci/import/src/GraphBuilderRegistry.cpp2
-rw-r--r--compiler/luci/import/src/ImporterEx.cpp61
-rw-r--r--compiler/luci/import/src/Nodes/CircleConst.cpp4
-rw-r--r--compiler/luci/import/src/Nodes/CircleDensify.cpp43
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleNodes.h1
-rw-r--r--compiler/luci/lang/include/luci/IR/CircleNodes.lst1
-rw-r--r--compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h40
-rw-r--r--compiler/luci/lang/src/Nodes/CircleConst.cpp1
-rw-r--r--compiler/luci/lang/src/Nodes/CircleDensify.test.cpp76
-rw-r--r--compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp1
-rw-r--r--compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp16
-rw-r--r--compiler/luci/logex/src/CircleNodeSummaryBuilders.h5
-rw-r--r--compiler/luci/partition/include/luci/ConnectNode.h219
-rw-r--r--compiler/luci/partition/src/ConnectNode.cpp2
-rw-r--r--compiler/luci/partition/src/ConnectNode.h218
-rw-r--r--compiler/luci/partition/src/ConnectNode.test.h2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleAbs.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleAbs.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleAdd.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleAdd.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleAddN.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleAddN.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleArgMax.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleArgMin.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleBCQGather.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleCast.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleCast.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleCeil.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleCeil.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleConcatenation.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleConst.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleConv2D.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleCos.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleCos.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleCustom.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleCustom.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleCustomOut.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleDensify.cpp38
-rw-r--r--compiler/luci/partition/src/Nodes/CircleDensify.test.cpp90
-rw-r--r--compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleDequantize.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleDiv.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleDiv.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleElu.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleElu.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleEqual.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleEqual.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleExp.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleExp.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleExpandDims.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleFill.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleFill.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleFloor.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleFloor.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleFloorMod.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleGather.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleGather.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleGatherNd.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleGreater.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleGreater.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleIf.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleIf.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleIfOut.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLess.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLess.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLessEqual.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLog.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLog.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLogistic.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMaximum.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMean.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMean.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMinimum.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMul.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleMul.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleNeg.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleNeg.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleNotEqual.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleOneHot.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CirclePRelu.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CirclePack.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CirclePack.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CirclePad.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CirclePad.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CirclePadV2.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CirclePow.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CirclePow.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleQuantize.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleRange.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleRange.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleRank.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleRank.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReduceAny.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReduceMax.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReduceMin.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReduceProd.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleRelu.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleRelu.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleRelu6.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReshape.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReshape.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReverseV2.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleRound.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleRound.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleRsqrt.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSVDF.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleScatterNd.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSelect.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSelect.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSelectV2.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleShape.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleShape.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSin.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSin.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSlice.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSlice.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSoftmax.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSplit.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSplit.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSplitOut.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSplitV.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSqrt.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSquare.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSquare.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSqueeze.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSub.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSub.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSum.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleSum.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleTanh.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleTanh.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleTile.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleTile.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleTopKV2.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleTranspose.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleUnique.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleUnique.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleUnpack.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleVariable.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleWhere.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleWhere.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleWhile.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleWhile.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleWhileOut.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleZerosLike.cpp2
-rw-r--r--compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp2
-rw-r--r--compiler/luci/partition/src/PartitionIR.cpp2
-rw-r--r--compiler/luci/partition/src/PartitionMerge.cpp2
-rw-r--r--compiler/luci/partition/src/PartitionPGroups.cpp2
-rw-r--r--compiler/luci/partition/src/PartitionPModules.cpp4
-rw-r--r--compiler/luci/pass/CMakeLists.txt8
-rw-r--r--compiler/luci/pass/include/luci/CircleOptimizer.h3
-rw-r--r--compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h38
-rw-r--r--compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h37
-rw-r--r--compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h39
-rw-r--r--compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h37
-rw-r--r--compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h37
-rw-r--r--compiler/luci/pass/src/CircleOptimizer.cpp39
-rw-r--r--compiler/luci/pass/src/CircleQuantizer.cpp7
-rw-r--r--compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp329
-rw-r--r--compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp525
-rw-r--r--compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp57
-rw-r--r--compiler/luci/pass/src/FoldDensifyPass.cpp180
-rw-r--r--compiler/luci/pass/src/FoldDensifyPass.test.cpp158
-rw-r--r--compiler/luci/pass/src/FoldDequantizePass.cpp96
-rw-r--r--compiler/luci/pass/src/FoldDequantizePass.test.cpp377
-rw-r--r--compiler/luci/pass/src/FoldSparseToDensePass.cpp2
-rw-r--r--compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp49
-rw-r--r--compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp86
-rw-r--r--compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp6
-rw-r--r--compiler/luci/pass/src/FuseAddWithTConvPass.cpp20
-rw-r--r--compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp53
-rw-r--r--compiler/luci/pass/src/FuseInstanceNormPass.cpp186
-rw-r--r--compiler/luci/pass/src/PropagateQParamBackwardPass.cpp1
-rw-r--r--compiler/luci/pass/src/PropagateQParamForwardPass.cpp9
-rw-r--r--compiler/luci/pass/src/QuantizationUtils.cpp126
-rw-r--r--compiler/luci/pass/src/QuantizationUtils.h16
-rw-r--r--compiler/luci/pass/src/QuantizeActivation.cpp11
-rw-r--r--compiler/luci/pass/src/QuantizeBias.cpp14
-rw-r--r--compiler/luci/pass/src/QuantizeBias.test.cpp189
-rw-r--r--compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp7
-rw-r--r--compiler/luci/pass/src/QuantizeWeights.cpp1
-rw-r--r--compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp91
-rw-r--r--compiler/luci/pass/src/QuantizedModelVerifier.test.cpp53
-rw-r--r--compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp80
-rw-r--r--compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp114
-rw-r--r--compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp172
-rw-r--r--compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp123
-rw-r--r--compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp196
-rw-r--r--compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp189
-rw-r--r--compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp172
-rw-r--r--compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp175
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h7
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedNodeType.cpp9
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedNodeType.h1
-rw-r--r--compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp312
-rw-r--r--compiler/luci/pass/src/helpers/SparsityFormatConverter.h129
-rw-r--r--compiler/luci/requires.cmake1
-rw-r--r--compiler/luci/service/src/CircleCloneNode.h1
-rw-r--r--compiler/luci/service/src/CircleShapeInferenceRule.cpp103
-rw-r--r--compiler/luci/service/src/CircleTypeInferenceRule.cpp5
-rw-r--r--compiler/luci/service/src/Nodes/CircleDensify.cpp27
-rw-r--r--compiler/luci/service/src/Nodes/CircleDensify.test.cpp33
-rw-r--r--compiler/luci/service/src/ShapeInfer_StridedSlice.cpp261
-rw-r--r--compiler/luci/tests/test.lst2
-rw-r--r--compiler/mio-circle04/include/mio_circle/Helper.h17
-rw-r--r--compiler/mio-circle04/include/mio_circle/Reader.h101
-rw-r--r--compiler/mio-circle04/src/Reader.cpp147
-rw-r--r--compiler/mio-circle04/src/Reader.test.cpp60
-rw-r--r--compiler/mio-tflite/README.md2
-rw-r--r--compiler/mio-tflite260/README.md2
-rw-r--r--compiler/mir/include/mir/Graph.h4
-rw-r--r--compiler/mir/src/Graph.cpp9
-rw-r--r--compiler/mir2loco/src/mir2loco.test.cpp49
-rw-r--r--compiler/moco/import/src/Importer.cpp2
-rw-r--r--compiler/moco/lang/src/IR/TFNode.cpp1
-rw-r--r--compiler/one-cmds/CMakeLists.txt8
-rw-r--r--compiler/one-cmds/dummy-driver/CMakeLists.txt27
-rw-r--r--compiler/one-cmds/dummy-driver/src/dummy-infer.cpp34
-rw-r--r--compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp34
-rw-r--r--compiler/one-cmds/dummy-driver/src/help-infer.cpp42
-rw-r--r--compiler/one-cmds/how-to-use-one-commands.txt5
-rw-r--r--compiler/one-cmds/one-build5
-rw-r--r--compiler/one-cmds/one-build.template.cfg1
-rw-r--r--compiler/one-cmds/one-codegen2
-rw-r--r--compiler/one-cmds/one-import-bcq11
-rw-r--r--compiler/one-cmds/one-import-onnx83
-rw-r--r--compiler/one-cmds/one-import-pytorch7
-rw-r--r--compiler/one-cmds/one-import-tf10
-rw-r--r--compiler/one-cmds/one-import-tflite5
-rw-r--r--compiler/one-cmds/one-infer224
-rw-r--r--compiler/one-cmds/one-init280
-rw-r--r--compiler/one-cmds/one-optimize13
-rw-r--r--compiler/one-cmds/one-pack2
-rw-r--r--compiler/one-cmds/one-partition130
-rw-r--r--compiler/one-cmds/one-prepare-venv10
-rw-r--r--compiler/one-cmds/one-profile2
-rw-r--r--compiler/one-cmds/one-quantize211
-rw-r--r--compiler/one-cmds/onecc105
-rw-r--r--compiler/one-cmds/onecc.template.cfg144
-rw-r--r--compiler/one-cmds/onelib/CfgRunner.py99
-rw-r--r--compiler/one-cmds/onelib/OptionBuilder.py95
-rw-r--r--compiler/one-cmds/onelib/TopologicalSortHelper.py45
-rw-r--r--compiler/one-cmds/onelib/WorkflowRunner.py131
-rw-r--r--compiler/one-cmds/onelib/constant.py7
-rw-r--r--compiler/one-cmds/onelib/make_cmd.py5
-rwxr-xr-xcompiler/one-cmds/onnx_legalizer.py59
-rw-r--r--compiler/one-cmds/requires.cmake1
-rw-r--r--compiler/one-cmds/tests/CMakeLists.txt12
-rw-r--r--compiler/one-cmds/tests/OONECC_024.cfg2
-rw-r--r--compiler/one-cmds/tests/one-build_008.cfg1
-rw-r--r--compiler/one-cmds/tests/one-build_009.cfg1
-rw-r--r--compiler/one-cmds/tests/one-import-onnx_002.test71
-rw-r--r--compiler/one-cmds/tests/one-infer-test-post-process.py16
-rw-r--r--compiler/one-cmds/tests/one-infer_001.test42
-rw-r--r--compiler/one-cmds/tests/one-infer_002.test48
-rw-r--r--compiler/one-cmds/tests/one-infer_003.test48
-rw-r--r--compiler/one-cmds/tests/one-infer_004.test38
-rw-r--r--compiler/one-cmds/tests/one-infer_005.cfg3
-rw-r--r--compiler/one-cmds/tests/one-infer_005.test51
-rw-r--r--compiler/one-cmds/tests/one-infer_006.test53
-rw-r--r--compiler/one-cmds/tests/one-infer_neg_001.test39
-rw-r--r--compiler/one-cmds/tests/one-infer_neg_002.test40
-rw-r--r--compiler/one-cmds/tests/one-infer_neg_003.test40
-rw-r--r--compiler/one-cmds/tests/one-infer_neg_004.test41
-rw-r--r--compiler/one-cmds/tests/one-infer_neg_005.test54
-rw-r--r--compiler/one-cmds/tests/one-optimize_001.test2
-rw-r--r--compiler/one-cmds/tests/one-optimize_002.test2
-rw-r--r--compiler/one-cmds/tests/one-optimize_neg_001.test2
-rw-r--r--compiler/one-cmds/tests/one-optimize_neg_002.test2
-rw-r--r--compiler/one-cmds/tests/one-optimize_neg_003.test2
-rw-r--r--compiler/one-cmds/tests/one-optimize_neg_004.test2
-rw-r--r--compiler/one-cmds/tests/one-partition_001.test46
-rw-r--r--compiler/one-cmds/tests/one-partition_neg_001.test51
-rw-r--r--compiler/one-cmds/tests/one-partition_neg_002.test47
-rw-r--r--compiler/one-cmds/tests/one-quantize_010.test65
-rw-r--r--compiler/one-cmds/tests/one-quantize_011.test56
-rw-r--r--compiler/one-cmds/tests/one-quantize_012.qconf.json16
-rw-r--r--compiler/one-cmds/tests/one-quantize_012.test46
-rw-r--r--compiler/one-cmds/tests/one-quantize_013.qconf.json16
-rw-r--r--compiler/one-cmds/tests/one-quantize_013.test48
-rw-r--r--compiler/one-cmds/tests/one-quantize_014.test59
-rw-r--r--compiler/one-cmds/tests/one-quantize_015.test45
-rw-r--r--compiler/one-cmds/tests/one-quantize_neg_019.test2
-rw-r--r--compiler/one-cmds/tests/one-quantize_neg_020.test48
-rw-r--r--compiler/one-cmds/tests/onecc_008.cfg1
-rw-r--r--compiler/one-cmds/tests/onecc_009.cfg1
-rw-r--r--compiler/one-cmds/tests/onecc_024.cfg22
-rw-r--r--compiler/one-cmds/tests/onecc_024.test77
-rw-r--r--compiler/one-cmds/tests/onecc_025.cfg20
-rw-r--r--compiler/one-cmds/tests/onecc_025.test40
-rw-r--r--compiler/one-cmds/tests/onecc_026.cfg16
-rw-r--r--compiler/one-cmds/tests/onecc_026.test46
-rw-r--r--compiler/one-cmds/tests/onecc_027.cfg15
-rw-r--r--compiler/one-cmds/tests/onecc_027.test46
-rw-r--r--compiler/one-cmds/tests/onecc_028.test42
-rw-r--r--compiler/one-cmds/tests/onecc_028.workflow.json37
-rw-r--r--compiler/one-cmds/tests/onecc_029.test42
-rw-r--r--compiler/one-cmds/tests/onecc_029.workflow.json30
-rw-r--r--compiler/one-cmds/tests/onecc_030.test48
-rw-r--r--compiler/one-cmds/tests/onecc_030.workflow.json29
-rw-r--r--compiler/one-cmds/tests/onecc_031.test48
-rw-r--r--compiler/one-cmds/tests/onecc_031.workflow.json33
-rw-r--r--compiler/one-cmds/tests/onecc_032.test48
-rw-r--r--compiler/one-cmds/tests/onecc_032.workflow.json42
-rw-r--r--compiler/one-cmds/tests/onecc_033.test42
-rw-r--r--compiler/one-cmds/tests/onecc_033.workflow.json42
-rw-r--r--compiler/one-cmds/tests/onecc_034.test48
-rw-r--r--compiler/one-cmds/tests/onecc_034.workflow.json35
-rw-r--r--compiler/one-cmds/tests/onecc_035.test47
-rw-r--r--compiler/one-cmds/tests/onecc_035.workflow.json22
-rw-r--r--compiler/one-cmds/tests/onecc_036.test47
-rw-r--r--compiler/one-cmds/tests/onecc_036.workflow.json18
-rw-r--r--compiler/one-cmds/tests/onecc_037.test42
-rw-r--r--compiler/one-cmds/tests/onecc_037.workflow.json29
-rw-r--r--compiler/one-cmds/tests/onecc_038.test42
-rw-r--r--compiler/one-cmds/tests/onecc_038.workflow.json31
-rw-r--r--compiler/one-cmds/tests/onecc_039.test48
-rw-r--r--compiler/one-cmds/tests/onecc_039.workflow.json21
-rw-r--r--compiler/one-cmds/tests/onecc_040.cfg20
-rw-r--r--compiler/one-cmds/tests/onecc_040.test42
-rw-r--r--compiler/one-cmds/tests/onecc_040.workflow.json10
-rw-r--r--compiler/one-cmds/tests/onecc_041.cfg16
-rw-r--r--compiler/one-cmds/tests/onecc_041.test58
-rw-r--r--compiler/one-cmds/tests/onecc_041.workflow.json61
-rw-r--r--compiler/one-cmds/tests/onecc_neg_009.test69
-rw-r--r--compiler/one-cmds/tests/onecc_neg_010.test41
-rw-r--r--compiler/one-cmds/tests/onecc_neg_011.cfg13
-rw-r--r--compiler/one-cmds/tests/onecc_neg_011.test41
-rw-r--r--compiler/one-cmds/tests/onecc_neg_012.cfg15
-rw-r--r--compiler/one-cmds/tests/onecc_neg_012.test41
-rw-r--r--compiler/one-cmds/tests/onecc_neg_013.test41
-rw-r--r--compiler/one-cmds/tests/onecc_neg_014.test41
-rw-r--r--compiler/one-cmds/tests/onecc_neg_014.workflow.json3
-rw-r--r--compiler/one-cmds/tests/onecc_neg_015.test42
-rw-r--r--compiler/one-cmds/tests/onecc_neg_015.workflow.json21
-rw-r--r--compiler/one-cmds/tests/onecc_neg_016.test42
-rw-r--r--compiler/one-cmds/tests/onecc_neg_016.workflow.json21
-rw-r--r--compiler/one-cmds/tests/onecc_neg_017.test41
-rw-r--r--compiler/one-cmds/tests/onecc_neg_017.workflow.json18
-rw-r--r--compiler/one-cmds/tests/onecc_neg_018.test41
-rw-r--r--compiler/one-cmds/tests/onecc_neg_018.workflow.json24
-rw-r--r--compiler/one-cmds/tests/onecc_neg_019.test41
-rw-r--r--compiler/one-cmds/tests/onecc_neg_019.workflow.json21
-rw-r--r--compiler/one-cmds/tests/onecc_neg_020.test41
-rw-r--r--compiler/one-cmds/tests/onecc_neg_020.workflow.json21
-rw-r--r--compiler/one-cmds/tests/onecc_neg_021.test41
-rw-r--r--compiler/one-cmds/tests/onecc_neg_021.workflow.json44
-rw-r--r--compiler/one-cmds/tests/onecc_neg_022.cfg16
-rw-r--r--compiler/one-cmds/tests/onecc_neg_022.test41
-rw-r--r--compiler/one-cmds/tests/onecc_neg_022.workflow.json63
-rw-r--r--compiler/one-cmds/tests/onecc_neg_023.test41
-rw-r--r--compiler/one-cmds/tests/onecc_neg_023.workflow.json30
-rw-r--r--compiler/one-cmds/tests/prepare_test_materials.sh14
-rw-r--r--compiler/one-cmds/utils.py59
-rw-r--r--compiler/onnx-tools/CMakeLists.txt6
-rw-r--r--compiler/pota-quantization-value-test/CMakeLists.txt4
-rw-r--r--compiler/record-minmax-conversion-test/CMakeLists.txt4
-rw-r--r--compiler/record-minmax/driver/Driver.cpp39
-rw-r--r--compiler/record-minmax/include/RecordFunction.h2
-rw-r--r--compiler/record-minmax/src/MinMaxObserver.cpp3
-rw-r--r--compiler/record-minmax/src/RecordMinMax.cpp8
-rw-r--r--compiler/souschef/CMakeLists.txt7
-rw-r--r--compiler/souschef/include/souschef/Data/Explicit.h35
-rw-r--r--compiler/souschef/include/souschef/Data/Gaussian.h21
-rw-r--r--compiler/souschef/src/Explicit.cpp21
-rw-r--r--compiler/souschef/src/Gaussian.cpp45
-rw-r--r--compiler/tf2circle-conversion-test/CMakeLists.txt4
-rw-r--r--compiler/tf2circle-dredd-pb-test/CMakeLists.txt4
-rw-r--r--compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt4
-rw-r--r--compiler/tf2circle-model-test/CMakeLists.txt4
-rw-r--r--compiler/tf2tflite-dredd-pb-test/CMakeLists.txt4
-rw-r--r--compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt4
-rw-r--r--compiler/tf2tflite-value-pb-test/CMakeLists.txt4
-rw-r--r--compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt4
-rw-r--r--compiler/tf2tfliteV2-conversion-test/CMakeLists.txt4
-rwxr-xr-xcompiler/tf2tfliteV2/tf2tfliteV2.py9
-rw-r--r--compiler/tfl-inspect/CMakeLists.txt4
-rw-r--r--compiler/tfl-inspect/driver/Driver.cpp2
-rw-r--r--compiler/tfl-verify/CMakeLists.txt4
-rw-r--r--compiler/tfl-verify/src/Driver.cpp2
-rw-r--r--compiler/tflchef/CMakeLists.txt5
-rw-r--r--compiler/tflchef/core/src/Convert.cpp222
-rw-r--r--compiler/tflchef/core/src/Convert.h49
-rw-r--r--compiler/tflchef/core/src/DataChef.def4
-rw-r--r--compiler/tflchef/core/src/ModelChef.cpp167
-rw-r--r--compiler/tflchef/core/src/Op/Densify.cpp29
-rw-r--r--compiler/tflchef/core/src/Op/Densify.h46
-rw-r--r--compiler/tflchef/core/src/OpChef.def1
-rw-r--r--compiler/tflchef/core/src/OpChefs.h1
-rw-r--r--compiler/tflchef/proto/tflchef.proto12
-rw-r--r--compiler/tflchef/tests/make_sparse/test.recipe44
-rw-r--r--compiler/tflchef/tests/make_sparse_f16/test.recipe54
-rw-r--r--compiler/tflchef/tflite/CMakeLists.txt1
-rw-r--r--compiler/tflchef/tflite/src/Convert.cpp3
-rw-r--r--compiler/tflchef/tflite/src/FillerHelper.cpp15
-rw-r--r--compiler/tflchef/tflite/src/FillerHelper.h8
-rw-r--r--compiler/tflchef/tflite/src/Op/Add.cpp6
-rw-r--r--compiler/tflchef/tflite/src/Op/Maximum.cpp6
-rw-r--r--compiler/tflchef/tflite/src/Op/Minimum.cpp6
-rw-r--r--compiler/tflchef/tflite/src/Op/Mul.cpp6
-rw-r--r--compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp2
-rw-r--r--compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp2
-rw-r--r--compiler/tflchef/tflite/src/Op/PadV2.cpp7
-rw-r--r--compiler/tflchef/tflite/src/Op/ScatterNd.cpp6
-rw-r--r--compiler/tflchef/tflite/src/Op/SegmentSum.cpp7
-rw-r--r--compiler/tflchef/tflite/src/Op/Sub.cpp6
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Abs.h (renamed from compiler/tflchef/tflite/src/Op/Abs.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Add.h (renamed from compiler/tflchef/tflite/src/Op/Add.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/AddN.h (renamed from compiler/tflchef/tflite/src/Op/AddN.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ArgMax.h (renamed from compiler/tflchef/tflite/src/Op/ArgMax.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ArgMin.h (renamed from compiler/tflchef/tflite/src/Op/ArgMin.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/AveragePool2D.h (renamed from compiler/tflchef/tflite/src/Op/AveragePool2D.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/BatchMatMul.h (renamed from compiler/tflchef/tflite/src/Op/BatchMatMul.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/BatchToSpaceND.h (renamed from compiler/tflchef/tflite/src/Op/BatchToSpaceND.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/BidirectionalSequenceLSTM.h (renamed from compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Cast.h (renamed from compiler/tflchef/tflite/src/Op/Cast.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Ceil.h (renamed from compiler/tflchef/tflite/src/Op/Ceil.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Concatenation.h (renamed from compiler/tflchef/tflite/src/Op/Concatenation.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Conv2D.h (renamed from compiler/tflchef/tflite/src/Op/Conv2D.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Cos.h (renamed from compiler/tflchef/tflite/src/Op/Cos.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/DepthToSpace.h (renamed from compiler/tflchef/tflite/src/Op/DepthToSpace.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/DepthwiseConv2D.h (renamed from compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Dequantize.h (renamed from compiler/tflchef/tflite/src/Op/Dequantize.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Div.h (renamed from compiler/tflchef/tflite/src/Op/Div.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ELU.h (renamed from compiler/tflchef/tflite/src/Op/ELU.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Equal.h (renamed from compiler/tflchef/tflite/src/Op/Equal.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Exp.h (renamed from compiler/tflchef/tflite/src/Op/Exp.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ExpandDims.h (renamed from compiler/tflchef/tflite/src/Op/ExpandDims.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/FakeQuant.h (renamed from compiler/tflchef/tflite/src/Op/FakeQuant.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Fill.h (renamed from compiler/tflchef/tflite/src/Op/Fill.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Floor.h (renamed from compiler/tflchef/tflite/src/Op/Floor.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/FloorDiv.h (renamed from compiler/tflchef/tflite/src/Op/FloorDiv.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/FloorMod.h (renamed from compiler/tflchef/tflite/src/Op/FloorMod.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/FullyConnected.h (renamed from compiler/tflchef/tflite/src/Op/FullyConnected.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Gather.h (renamed from compiler/tflchef/tflite/src/Op/Gather.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/GatherNd.h (renamed from compiler/tflchef/tflite/src/Op/GatherNd.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Greater.h (renamed from compiler/tflchef/tflite/src/Op/Greater.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/GreaterEqual.h (renamed from compiler/tflchef/tflite/src/Op/GreaterEqual.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/L2Normalize.h (renamed from compiler/tflchef/tflite/src/Op/L2Normalize.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/L2Pool2D.h (renamed from compiler/tflchef/tflite/src/Op/L2Pool2D.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/LeakyRelu.h (renamed from compiler/tflchef/tflite/src/Op/LeakyRelu.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Less.h (renamed from compiler/tflchef/tflite/src/Op/Less.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/LessEqual.h (renamed from compiler/tflchef/tflite/src/Op/LessEqual.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/LocalResponseNormalization.h (renamed from compiler/tflchef/tflite/src/Op/LocalResponseNormalization.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Log.h (renamed from compiler/tflchef/tflite/src/Op/Log.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/LogSoftmax.h (renamed from compiler/tflchef/tflite/src/Op/LogSoftmax.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/LogicalAnd.h (renamed from compiler/tflchef/tflite/src/Op/LogicalAnd.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/LogicalNot.h (renamed from compiler/tflchef/tflite/src/Op/LogicalNot.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/LogicalOr.h (renamed from compiler/tflchef/tflite/src/Op/LogicalOr.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Logistic.h (renamed from compiler/tflchef/tflite/src/Op/Logistic.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/MatrixDiag.h (renamed from compiler/tflchef/tflite/src/Op/MatrixDiag.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/MatrixSetDiag.h (renamed from compiler/tflchef/tflite/src/Op/MatrixSetDiag.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/MaxPool2D.h (renamed from compiler/tflchef/tflite/src/Op/MaxPool2D.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Maximum.h (renamed from compiler/tflchef/tflite/src/Op/Maximum.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Mean.h (renamed from compiler/tflchef/tflite/src/Op/Mean.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Minimum.h (renamed from compiler/tflchef/tflite/src/Op/Minimum.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/MirrorPad.h (renamed from compiler/tflchef/tflite/src/Op/MirrorPad.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Mul.h (renamed from compiler/tflchef/tflite/src/Op/Mul.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Neg.h (renamed from compiler/tflchef/tflite/src/Op/Neg.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV4.h (renamed from compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV5.h (renamed from compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/NotEqual.h (renamed from compiler/tflchef/tflite/src/Op/NotEqual.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/OneHot.h (renamed from compiler/tflchef/tflite/src/Op/OneHot.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/PRelu.h (renamed from compiler/tflchef/tflite/src/Op/PRelu.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Pack.h (renamed from compiler/tflchef/tflite/src/Op/Pack.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Pad.h (renamed from compiler/tflchef/tflite/src/Op/Pad.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/PadV2.h (renamed from compiler/tflchef/tflite/src/Op/PadV2.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Pow.h (renamed from compiler/tflchef/tflite/src/Op/Pow.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Quantize.h (renamed from compiler/tflchef/tflite/src/Op/Quantize.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Range.h (renamed from compiler/tflchef/tflite/src/Op/Range.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Rank.h (renamed from compiler/tflchef/tflite/src/Op/Rank.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ReLU.h (renamed from compiler/tflchef/tflite/src/Op/ReLU.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ReLU6.h (renamed from compiler/tflchef/tflite/src/Op/ReLU6.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ReLUN1To1.h (renamed from compiler/tflchef/tflite/src/Op/ReLUN1To1.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ReduceAny.h (renamed from compiler/tflchef/tflite/src/Op/ReduceAny.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ReduceMax.h (renamed from compiler/tflchef/tflite/src/Op/ReduceMax.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ReduceMin.h (renamed from compiler/tflchef/tflite/src/Op/ReduceMin.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ReduceProd.h (renamed from compiler/tflchef/tflite/src/Op/ReduceProd.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Reshape.h (renamed from compiler/tflchef/tflite/src/Op/Reshape.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ResizeBilinear.h (renamed from compiler/tflchef/tflite/src/Op/ResizeBilinear.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ResizeNearestNeighbor.h (renamed from compiler/tflchef/tflite/src/Op/ResizeNearestNeighbor.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ReverseSequence.h (renamed from compiler/tflchef/tflite/src/Op/ReverseSequence.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ReverseV2.h (renamed from compiler/tflchef/tflite/src/Op/ReverseV2.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Round.h (renamed from compiler/tflchef/tflite/src/Op/Round.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Rsqrt.h (renamed from compiler/tflchef/tflite/src/Op/Rsqrt.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/SVDF.h (renamed from compiler/tflchef/tflite/src/Op/SVDF.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ScatterNd.h (renamed from compiler/tflchef/tflite/src/Op/ScatterNd.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/SegmentSum.h (renamed from compiler/tflchef/tflite/src/Op/SegmentSum.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Select.h (renamed from compiler/tflchef/tflite/src/Op/Select.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/SelectV2.h (renamed from compiler/tflchef/tflite/src/Op/SelectV2.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Shape.h (renamed from compiler/tflchef/tflite/src/Op/Shape.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Sin.h (renamed from compiler/tflchef/tflite/src/Op/Sin.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Slice.h (renamed from compiler/tflchef/tflite/src/Op/Slice.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Softmax.h (renamed from compiler/tflchef/tflite/src/Op/Softmax.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/SpaceToBatchND.h (renamed from compiler/tflchef/tflite/src/Op/SpaceToBatchND.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/SpaceToDepth.h (renamed from compiler/tflchef/tflite/src/Op/SpaceToDepth.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/SparseToDense.h (renamed from compiler/tflchef/tflite/src/Op/SparseToDense.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Split.h (renamed from compiler/tflchef/tflite/src/Op/Split.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/SplitV.h (renamed from compiler/tflchef/tflite/src/Op/SplitV.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Sqrt.h (renamed from compiler/tflchef/tflite/src/Op/Sqrt.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Square.h (renamed from compiler/tflchef/tflite/src/Op/Square.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/SquaredDifference.h (renamed from compiler/tflchef/tflite/src/Op/SquaredDifference.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Squeeze.h (renamed from compiler/tflchef/tflite/src/Op/Squeeze.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/StridedSlice.h (renamed from compiler/tflchef/tflite/src/Op/StridedSlice.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Sub.h (renamed from compiler/tflchef/tflite/src/Op/Sub.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Sum.h (renamed from compiler/tflchef/tflite/src/Op/Sum.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Tanh.h (renamed from compiler/tflchef/tflite/src/Op/Tanh.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Tile.h (renamed from compiler/tflchef/tflite/src/Op/Tile.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/TopKV2.h (renamed from compiler/tflchef/tflite/src/Op/TopKV2.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Transpose.h (renamed from compiler/tflchef/tflite/src/Op/Transpose.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/TransposeConv.h (renamed from compiler/tflchef/tflite/src/Op/TransposeConv.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/UnidirectionalSequenceLSTM.h (renamed from compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Unique.h (renamed from compiler/tflchef/tflite/src/Op/Unique.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Unpack.h (renamed from compiler/tflchef/tflite/src/Op/Unpack.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/Where.h (renamed from compiler/tflchef/tflite/src/Op/Where.h)0
-rw-r--r--compiler/tflchef/tflite/src/Op/include/ZerosLike.h (renamed from compiler/tflchef/tflite/src/Op/ZerosLike.h)0
-rw-r--r--compiler/tflchef/tflite/src/TFliteOpChefs.h220
-rw-r--r--compiler/tflchef/tools/file/Driver.cpp6
-rw-r--r--compiler/tflchef/tools/reverse/Driver.cpp6
-rw-r--r--compiler/tfldump/CMakeLists.txt1
-rw-r--r--compiler/tfldump/driver/Driver.cpp15
-rw-r--r--compiler/tfldump/include/tflread/Model.h43
-rw-r--r--compiler/tfldump/requires.cmake1
-rw-r--r--compiler/tfldump/src/Dump.cpp6
-rw-r--r--compiler/tfldump/src/Load.cpp133
-rw-r--r--compiler/tfldump/src/OpPrinter.cpp1
-rw-r--r--compiler/tflite2circle-conversion-test/CMakeLists.txt4
-rw-r--r--compiler/tflite2circle/driver/Driver.cpp23
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions.h2
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp29
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp30
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h31
-rw-r--r--compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp2
-rw-r--r--compiler/tflite2circle/src/CircleModel.cpp9
-rw-r--r--compiler/tflite2circle/src/TFLBuiltinOptions.lst3
-rw-r--r--compiler/vconone/CMakeLists.txt2
-rw-r--r--compiler/vconone/src/version.cpp2
-rw-r--r--compute/ARMComputeEx/CMakeLists.txt2
-rw-r--r--compute/cker/CMakeLists.txt17
-rw-r--r--compute/cker/include/cker/CpuBackendThreadpool.h7
-rw-r--r--compute/cker/include/cker/NeonTensorUtils.h2
-rw-r--r--compute/cker/include/cker/operation/Conv.h30
-rw-r--r--compute/cker/include/cker/operation/DepthwiseConv.h1
-rw-r--r--compute/cker/include/cker/operation/reference/Conv.h59
-rw-r--r--compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvUInt8.h152
-rw-r--r--compute/cker/include/cker/ruy/RuySupport.h34
-rw-r--r--compute/cker/src/Range.test.cc (renamed from compute/test/cker/Range.cc)0
-rw-r--r--compute/ruy/include/ruy/RuySupport.h43
-rw-r--r--compute/ruy/include/ruy/operation/Conv.h2
-rw-r--r--compute/ruy/include/ruy/operation/FullyConnected.h2
-rw-r--r--compute/test/CMakeLists.txt17
-rw-r--r--docs/conf.py2
-rw-r--r--docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md18
-rw-r--r--docs/release/1.20/index.rst13
-rw-r--r--docs/release/1.20/release-note-1.20.0.md34
-rw-r--r--docs/release/1.21/index.rst13
-rw-r--r--docs/release/1.21/release-note_1.21.0.md35
-rw-r--r--infra/cmake/modules/IdentifyPlatform.cmake4
-rw-r--r--infra/cmake/packages/AbseilConfig.cmake14
-rw-r--r--infra/cmake/packages/AbseilSourceConfig.cmake7
-rw-r--r--infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/CaffeSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/CpuInfoSourceConfig.cmake4
-rw-r--r--infra/cmake/packages/Egl_HeadersSourceConfig.cmake21
-rw-r--r--infra/cmake/packages/FarmhashSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/Fp16SourceConfig.cmake2
-rw-r--r--infra/cmake/packages/GEMMLowpSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/GFlagsSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/GTestSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/HDF5SourceConfig.cmake3
-rw-r--r--infra/cmake/packages/JsoncppSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/NEON2SSESourceConfig.cmake8
-rw-r--r--infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/OouraFFTSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/Opengl_HeadersSourceConfig.cmake21
-rw-r--r--infra/cmake/packages/ProtobufSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/Pybind11SourceConfig.cmake3
-rw-r--r--infra/cmake/packages/PytorchSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfig.cmake21
-rw-r--r--infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfigVersion.cmake10
-rw-r--r--infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowGpuSourceConfig.cmake5
-rw-r--r--infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfig.cmake21
-rw-r--r--infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfigVersion.cmake10
-rw-r--r--infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake3
-rw-r--r--infra/cmake/packages/VulkanSourceConfig.cmake20
-rw-r--r--infra/command/format6
-rw-r--r--infra/command/gen-coverage-report4
-rw-r--r--infra/debian/compiler/changelog47
-rw-r--r--infra/debian/compiler/docs/one-infer.146
-rw-r--r--infra/debian/compiler/docs/one-partition.156
-rw-r--r--infra/debian/compiler/one-compiler.install9
-rw-r--r--infra/debian/compiler/one-compiler.manpages2
-rw-r--r--infra/debian/runtime/changelog15
-rwxr-xr-xinfra/debian/runtime/rules2
-rw-r--r--infra/docker/bionic/Dockerfile2
-rw-r--r--infra/docker/focal/Dockerfile2
-rw-r--r--infra/nncc/CMakeLists.txt5
-rw-r--r--infra/nncc/cmake/options/options_armv7em-generic.cmake3
-rw-r--r--infra/nnfw/CMakeLists.txt6
-rw-r--r--infra/nnfw/cmake/ApplyCompileFlags.cmake10
-rw-r--r--infra/nnfw/cmake/CfgOptionFlags.cmake5
-rw-r--r--infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake3
-rw-r--r--infra/nnfw/cmake/buildtool/config/config_linux.cmake13
-rw-r--r--infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake3
-rw-r--r--infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake6
-rw-r--r--infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake6
-rw-r--r--infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake6
-rw-r--r--infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake10
-rw-r--r--infra/nnfw/cmake/options/options_aarch64-android.cmake2
-rw-r--r--infra/nnfw/cmake/options/options_armv7l-tizen.cmake1
-rw-r--r--infra/nnfw/cmake/options/options_x86_64-tizen.cmake1
-rw-r--r--infra/nnfw/cmake/packages/ARMComputeConfig.cmake8
-rw-r--r--infra/nnfw/cmake/packages/CpuInfoConfig.cmake16
-rw-r--r--infra/nnfw/cmake/packages/GLib2.0Config.cmake41
-rw-r--r--infra/nnfw/cmake/packages/Ruy/CMakeLists.txt5
-rw-r--r--infra/nnfw/cmake/packages/RuyConfig.cmake17
-rw-r--r--infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt6
-rw-r--r--infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLite/CMakeLists.txt96
-rw-r--r--infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfig.cmake44
-rw-r--r--infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfigVersion.cmake9
-rw-r--r--infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLite/CMakeLists.txt121
-rw-r--r--infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfig.cmake50
-rw-r--r--infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfigVersion.cmake9
-rw-r--r--infra/nnfw/config/gbs.conf11
-rw-r--r--infra/packaging/preset/2022032313
-rw-r--r--infra/packaging/preset/20220323_windows14
-rw-r--r--infra/packaging/res/tf2nnpkg.202203232
-rw-r--r--infra/scripts/compiler_modules.sh10
-rwxr-xr-xinfra/scripts/docker_build_nncc.sh4
-rwxr-xr-xinfra/scripts/docker_build_test_x64.sh4
-rwxr-xr-xinfra/scripts/docker_collect_nnpkg_resources.sh6
-rwxr-xr-xinfra/scripts/test_ubuntu_runtime_mixed.sh4
-rwxr-xr-xinfra/scripts/unittest_compiler_xml.sh11
-rw-r--r--nnpackage/examples/README.md7
-rw-r--r--nnpackage/examples/v1.3.0/two_tflites/README.md28
-rw-r--r--nnpackage/examples/v1.3.0/two_tflites/metadata/MANIFEST11
-rw-r--r--nnpackage/examples/v1.3.0/two_tflites/metadata/tc/expected.h5bin0 -> 1614584 bytes
-rw-r--r--nnpackage/examples/v1.3.0/two_tflites/metadata/tc/input.h5bin0 -> 611064 bytes
-rw-r--r--nnpackage/examples/v1.3.0/two_tflites/mv1.0.tflitebin0 -> 4276 bytes
-rw-r--r--nnpackage/examples/v1.3.0/two_tflites/mv1.1.tflitebin0 -> 2024 bytes
-rw-r--r--nnpackage/schema/circle_schema.fbs173
-rw-r--r--packaging/ABSEIL.tar.gzbin1702946 -> 1909045 bytes
-rw-r--r--packaging/CPUINFO.tar.gzbin3476406 -> 136288 bytes
-rw-r--r--packaging/FP16.tar.gzbin71362 -> 70160 bytes
-rw-r--r--packaging/RUY.tar.gzbin235110 -> 0 bytes
-rw-r--r--packaging/TENSORFLOW-2.8.0-RUY.tar.gzbin0 -> 290633 bytes
-rw-r--r--packaging/nnfw.spec119
-rw-r--r--res/CircleRecipes/Quant_InstanceNorm_000/test.qconf.json11
-rw-r--r--res/CircleRecipes/Quant_InstanceNorm_000/test.recipe43
-rw-r--r--res/CircleRecipes/Quant_InstanceNorm_000/test.reverse0
-rw-r--r--res/CircleRecipes/Quant_InstanceNorm_000/test.rule13
-rw-r--r--res/CircleRecipes/Quant_InstanceNorm_001/test.qconf.json11
-rw-r--r--res/CircleRecipes/Quant_InstanceNorm_001/test.recipe43
-rw-r--r--res/CircleRecipes/Quant_InstanceNorm_001/test.reverse0
-rw-r--r--res/CircleRecipes/Quant_InstanceNorm_001/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_004/test.recipe30
-rw-r--r--res/TensorFlowLiteRecipes/ArgMax_004/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Densify_000/test.recipe44
-rw-r--r--res/TensorFlowLiteRecipes/FullyConnected_007/test.recipe29
-rw-r--r--res/TensorFlowLiteRecipes/FullyConnected_007/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/FullyConnected_007/test.rule7
-rw-r--r--res/TensorFlowLiteRecipes/Net_Densify_Add_000/test.recipe44
-rw-r--r--res/TensorFlowLiteRecipes/Net_Densify_Dequantize_Add_000/test.recipe54
-rw-r--r--res/TensorFlowLiteRecipes/Net_Dequantize_Add_000/test.recipe41
-rw-r--r--res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe20
-rw-r--r--res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe20
-rw-r--r--res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe20
-rw-r--r--res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe26
-rw-r--r--res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe26
-rw-r--r--res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe30
-rw-r--r--res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.recipe135
-rw-r--r--res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.rule7
-rw-r--r--res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.recipe149
-rw-r--r--res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.rule7
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Add_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Add_001/test.recipe31
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Add_001/test.rule12
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Add_002/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Add_002/test.recipe31
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Add_002/test.rule12
-rw-r--r--res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.recipe24
-rw-r--r--res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.recipe24
-rw-r--r--res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.recipe28
-rw-r--r--res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.recipe28
-rw-r--r--res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.recipe28
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.recipe28
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_000/test.recipe44
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_000/test.rule10
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_001/test.recipe44
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_002/test.recipe44
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_002/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_003/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_003/test.recipe44
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_003/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_003/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_004/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_004/test.recipe44
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_004/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Conv_004/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.recipe49
-rw-r--r--res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.recipe49
-rw-r--r--res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.recipe55
-rw-r--r--res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.recipe55
-rw-r--r--res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.recipe20
-rw-r--r--res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.recipe20
-rw-r--r--res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Logistic_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Logistic_000/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Logistic_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Logistic_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Logistic_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Logistic_001/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Logistic_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Logistic_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.recipe24
-rw-r--r--res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.recipe24
-rw-r--r--res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Mean_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Mean_000/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Mean_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Mean_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Mean_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Mean_001/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Mean_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Mean_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Mul_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Mul_000/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Mul_000/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Mul_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Mul_001/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Mul_001/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Neg_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Neg_000/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Neg_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Neg_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Neg_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Neg_001/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Neg_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Neg_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_PRelu_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_PRelu_000/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Quant_PRelu_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_PRelu_000/test.rule12
-rw-r--r--res/TensorFlowLiteRecipes/Quant_PRelu_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_PRelu_001/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Quant_PRelu_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_PRelu_001/test.rule12
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Pad_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Pad_000/test.recipe30
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Pad_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Pad_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Pad_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Pad_001/test.recipe30
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Pad_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Pad_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU_000/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU_001/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ReLU_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Reshape_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Reshape_000/test.recipe20
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Reshape_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Reshape_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Reshape_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Reshape_001/test.recipe20
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Reshape_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Reshape_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.recipe30
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.recipe30
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Slice_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Slice_000/test.recipe37
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Slice_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Slice_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Slice_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Slice_001/test.recipe37
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Slice_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Slice_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Softmax_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Softmax_000/test.recipe20
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Softmax_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Softmax_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Softmax_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Softmax_001/test.recipe20
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Softmax_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Softmax_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Tanh_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Tanh_000/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Tanh_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Tanh_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Tanh_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Tanh_001/test.recipe17
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Tanh_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Tanh_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.recipe54
-rw-r--r--res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.recipe54
-rw-r--r--res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.rule13
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Transpose_000/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Transpose_000/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Transpose_000/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Transpose_000/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Transpose_001/test.qconf.json11
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Transpose_001/test.recipe27
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Transpose_001/test.reverse0
-rw-r--r--res/TensorFlowLiteRecipes/Quant_Transpose_001/test.rule11
-rw-r--r--res/TensorFlowLiteRecipes/StridedSlice_004/test.recipe46
-rw-r--r--res/TensorFlowLiteRecipes/StridedSlice_004/test.reverse0
-rw-r--r--res/TensorFlowPythonExamples/examples/AddV2/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/PadV2/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/abs/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/add/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/add_n/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/argmax/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/argmin/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/biasadd/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/cast/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/ceil/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/concat/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/cond/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/cond_1/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/cos/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/div/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/elu/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/exp/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/fill/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/flatten/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/floor/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/floordiv/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/floormod/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/gather/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/gather_nd/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/greater/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/greater_equal/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/gru/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/instance_norm/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/leaky_relu/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/less/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/less_equal/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/log/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/log_softmax/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/logical_and/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/logical_not/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/logical_or/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/lstm/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/matmul/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/maximum/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/minimum/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/multiply/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/negative/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/not_equal/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/one_hot/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/pack/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/pad/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/pow/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/prelu/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/range/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/rank/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/reduce_all/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/reduce_any/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/reduce_max/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/reduce_min/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/relu/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/relu6/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/reshape/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/reverse_v2/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/rnn/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/round/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/rsqrt/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/segment_sum/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/shape/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/sigmoid/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/sin/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/slice/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/softmax/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/split/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/split_2/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/sqrt/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/square/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/squared_difference/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/squeeze_1/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/squeeze_2/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/strided_slice/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/subtract/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/sum/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/tanh/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/tile/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/top_k/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/unique/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/unstack/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/where/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/where_2/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/where_v2/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/while/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/while_2/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/while_3/__init__.py2
-rwxr-xr-xres/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py2
-rw-r--r--res/TensorFlowPythonExamples/examples/zeros_like/__init__.py2
-rw-r--r--runtime/contrib/android/api/build.gradle2
-rw-r--r--runtime/libs/misc/CMakeLists.txt19
-rw-r--r--runtime/libs/misc/examples/tensor_index_iterator.cpp74
-rw-r--r--runtime/libs/misc/include/misc/EnvConfigSource.h41
-rw-r--r--runtime/libs/misc/include/misc/GeneralConfigSource.h44
-rw-r--r--runtime/libs/misc/include/misc/IConfigSource.h46
-rw-r--r--runtime/libs/misc/include/misc/string_helpers.h2
-rw-r--r--runtime/libs/misc/src/EnvConfigSource.cpp40
-rw-r--r--runtime/libs/misc/src/GeneralConfigSource.cpp40
-rw-r--r--runtime/libs/misc/src/string_helpers.test.cpp81
-rw-r--r--runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp59
-rw-r--r--runtime/libs/misc/src/tensor/IndexIterator.test.cpp61
-rw-r--r--runtime/libs/ndarray/CMakeLists.txt11
-rw-r--r--runtime/libs/ndarray/include/ndarray/Array.h24
-rw-r--r--runtime/libs/ndarray/src/Array.test.cpp452
-rw-r--r--runtime/libs/ndarray/src/ContiguousSpan.test.cpp198
-rw-r--r--runtime/libs/ndarray/src/detail/cxx14.h67
-rw-r--r--runtime/libs/ndarray/test/CMakeLists.txt18
-rw-r--r--runtime/libs/ndarray/test/ndarray_test.cpp122
-rw-r--r--runtime/onert/CMakeLists.txt6
-rw-r--r--runtime/onert/api/CMakeLists.txt1
-rw-r--r--runtime/onert/api/include/nnfw.h4
-rw-r--r--runtime/onert/api/include/nnfw_version.h2
-rw-r--r--runtime/onert/api/src/nnfw_api.cc10
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.cc267
-rw-r--r--runtime/onert/api/src/nnfw_api_internal.h29
-rw-r--r--runtime/onert/backend/acl_cl/Backend.h4
-rw-r--r--runtime/onert/backend/acl_neon/Backend.h4
-rw-r--r--runtime/onert/backend/cpu/CMakeLists.txt2
-rw-r--r--runtime/onert/backend/cpu/ExternalContext.h2
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.cc10
-rw-r--r--runtime/onert/backend/cpu/ops/ConvolutionLayer.cc58
-rw-r--r--runtime/onert/backend/cpu/ops/ConvolutionLayer.h5
-rw-r--r--runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc64
-rw-r--r--runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h8
-rw-r--r--runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc4
-rw-r--r--runtime/onert/backend/ruy/ExternalContext.h2
-rw-r--r--runtime/onert/backend/ruy/KernelGenerator.cc10
-rw-r--r--runtime/onert/backend/trix/CMakeLists.txt2
-rw-r--r--runtime/onert/backend/trix/DevContext.h42
-rw-r--r--runtime/onert/backend/trix/ops/BulkLayer.cc105
-rw-r--r--runtime/onert/backend/trix/ops/BulkLayer.h2
-rw-r--r--runtime/onert/backend/xnnpack/KernelGenerator.cc10
-rw-r--r--runtime/onert/core/CMakeLists.txt14
-rw-r--r--runtime/onert/core/include/backend/ITensor.h1
-rw-r--r--runtime/onert/core/include/backend/basic/BackendContextHelpers.h4
-rw-r--r--runtime/onert/core/include/compiler/BackendManager.h13
-rw-r--r--runtime/onert/core/include/compiler/Compiler.h74
-rw-r--r--runtime/onert/core/include/compiler/LoweredGraph.h7
-rw-r--r--runtime/onert/core/include/compiler/StaticShapeInferer.h78
-rw-r--r--runtime/onert/core/include/exec/Execution.h6
-rw-r--r--runtime/onert/core/include/exec/Executors.h71
-rw-r--r--runtime/onert/core/include/exec/FunctionSequence.h3
-rw-r--r--runtime/onert/core/include/exec/IExecutor.h2
-rw-r--r--runtime/onert/core/include/ir/Graph.h29
-rw-r--r--runtime/onert/core/include/ir/Index.h10
-rw-r--r--runtime/onert/core/include/ir/Layout.h1
-rw-r--r--runtime/onert/core/include/ir/Model.h139
-rw-r--r--runtime/onert/core/include/ir/NNPkg.h193
-rw-r--r--runtime/onert/core/include/ir/Subgraphs.h139
-rw-r--r--runtime/onert/core/include/ir/TypeInfo.h6
-rw-r--r--runtime/onert/core/include/ir/operation/Bulk.h2
-rw-r--r--runtime/onert/core/include/util/CalculateActivationRange.h2
-rw-r--r--runtime/onert/core/include/util/Config.lst2
-rw-r--r--runtime/onert/core/include/util/ConfigSource.h10
-rw-r--r--runtime/onert/core/include/util/EnvConfigSource.h41
-rw-r--r--runtime/onert/core/include/util/GeneralConfigSource.h44
-rw-r--r--runtime/onert/core/include/util/IConfigSource.h46
-rw-r--r--runtime/onert/core/include/util/ObjectManager.h13
-rw-r--r--runtime/onert/core/include/util/TracingCtx.h26
-rw-r--r--runtime/onert/core/src/backend/builtin/ExternalContext.h2
-rw-r--r--runtime/onert/core/src/backend/builtin/KernelGenerator.cc32
-rw-r--r--runtime/onert/core/src/backend/builtin/KernelGenerator.h17
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc16
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/IfLayer.h7
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc4
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h6
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc19
-rw-r--r--runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h6
-rw-r--r--runtime/onert/core/src/compiler/BackendManager.cc15
-rw-r--r--runtime/onert/core/src/compiler/Compiler.cc505
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.cc85
-rw-r--r--runtime/onert/core/src/compiler/ExecutorFactory.h26
-rw-r--r--runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc10
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.cc11
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.h18
-rw-r--r--runtime/onert/core/src/compiler/HEScheduler.test.cc572
-rw-r--r--runtime/onert/core/src/compiler/Linear.cc10
-rw-r--r--runtime/onert/core/src/compiler/LoweredGraph.cc44
-rw-r--r--runtime/onert/core/src/compiler/ShapeValidator.cc667
-rw-r--r--runtime/onert/core/src/compiler/ShapeValidator.h8
-rw-r--r--runtime/onert/core/src/compiler/StaticShapeInferer.cc648
-rw-r--r--runtime/onert/core/src/compiler/TensorRegistries.h13
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc1
-rw-r--r--runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc18
-rw-r--r--runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc47
-rw-r--r--runtime/onert/core/src/dumper/dot/DotDumper.cc222
-rw-r--r--runtime/onert/core/src/dumper/dot/DotDumper.h25
-rw-r--r--runtime/onert/core/src/exec/DataflowExecutor.h17
-rw-r--r--runtime/onert/core/src/exec/ExecTime.cc6
-rw-r--r--runtime/onert/core/src/exec/ExecTime.test.cc106
-rw-r--r--runtime/onert/core/src/exec/Execution.cc24
-rw-r--r--runtime/onert/core/src/exec/Execution.test.cc302
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservee.h5
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservers.cc14
-rw-r--r--runtime/onert/core/src/exec/ExecutionObservers.h13
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.cc5
-rw-r--r--runtime/onert/core/src/exec/ExecutorBase.h15
-rw-r--r--runtime/onert/core/src/exec/Executors.cc183
-rw-r--r--runtime/onert/core/src/exec/FunctionSequence.cc4
-rw-r--r--runtime/onert/core/src/exec/JSONExecTime.cc4
-rw-r--r--runtime/onert/core/src/exec/LinearExecutor.h5
-rw-r--r--runtime/onert/core/src/exec/ParallelExecutor.h14
-rw-r--r--runtime/onert/core/src/exec/feature/MockTensor.h66
-rw-r--r--runtime/onert/core/src/exec/feature/nchw/Reader.test.cc85
-rw-r--r--runtime/onert/core/src/exec/feature/nchw/View.test.cc85
-rw-r--r--runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc86
-rw-r--r--runtime/onert/core/src/exec/feature/nhwc/View.h2
-rw-r--r--runtime/onert/core/src/exec/feature/nhwc/View.test.cc86
-rw-r--r--runtime/onert/core/src/interp/InterpExecutor.cc7
-rw-r--r--runtime/onert/core/src/interp/InterpExecutor.h7
-rw-r--r--runtime/onert/core/src/interp/InterpExecutor.test.cc355
-rw-r--r--runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc10
-rw-r--r--runtime/onert/core/src/interp/operations/Concat.cc8
-rw-r--r--runtime/onert/core/src/interp/operations/Conv2D.cc10
-rw-r--r--runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc10
-rw-r--r--runtime/onert/core/src/interp/operations/ElementwiseActivations.cc9
-rw-r--r--runtime/onert/core/src/interp/operations/FullyConnected.cc8
-rw-r--r--runtime/onert/core/src/interp/operations/Gather.cc8
-rw-r--r--runtime/onert/core/src/interp/operations/InstanceNorm.cc8
-rw-r--r--runtime/onert/core/src/interp/operations/Pad.cc6
-rw-r--r--runtime/onert/core/src/interp/operations/Pool2D.cc12
-rw-r--r--runtime/onert/core/src/interp/operations/Reshape.cc2
-rw-r--r--runtime/onert/core/src/interp/operations/Softmax.cc8
-rw-r--r--runtime/onert/core/src/interp/operations/TransposeConv.cc8
-rw-r--r--runtime/onert/core/src/ir/Graph.cc14
-rw-r--r--runtime/onert/core/src/ir/Graph.test.cc147
-rw-r--r--runtime/onert/core/src/ir/LayoutSet.test.cc67
-rw-r--r--runtime/onert/core/src/ir/MockNode.h (renamed from runtime/onert/test/core/ir/MockNode.h)0
-rw-r--r--runtime/onert/core/src/ir/Operand.test.cc86
-rw-r--r--runtime/onert/core/src/ir/OperandIndexSequence.test.cc52
-rw-r--r--runtime/onert/core/src/ir/Operands.test.cc45
-rw-r--r--runtime/onert/core/src/ir/Operation.test.cc98
-rw-r--r--runtime/onert/core/src/ir/Operations.test.cc42
-rw-r--r--runtime/onert/core/src/ir/Shape.test.cc58
-rw-r--r--runtime/onert/core/src/ir/verifier/Verifier.test.cc93
-rw-r--r--runtime/onert/core/src/util/ChromeTracingEventWriter.cc6
-rw-r--r--runtime/onert/core/src/util/ConfigSource.cc25
-rw-r--r--runtime/onert/core/src/util/EnvConfigSource.cc40
-rw-r--r--runtime/onert/core/src/util/EventCollector.cc2
-rw-r--r--runtime/onert/core/src/util/EventCollector.h7
-rw-r--r--runtime/onert/core/src/util/EventRecorder.cc2
-rw-r--r--runtime/onert/core/src/util/EventWriter.cc2
-rw-r--r--runtime/onert/core/src/util/GeneralConfigSource.cc45
-rw-r--r--runtime/onert/core/src/util/Index.test.cc34
-rw-r--r--runtime/onert/core/src/util/MDTableEventWriter.cc10
-rw-r--r--runtime/onert/core/src/util/ObjectManager.test.cc211
-rw-r--r--runtime/onert/core/src/util/SNPEEventWriter.cc5
-rw-r--r--runtime/onert/core/src/util/ShapeInference.test.cc544
-rw-r--r--runtime/onert/frontend/base_loader/include/base_loader.h36
-rw-r--r--runtime/onert/frontend/circle/include/circle_loader.h4
-rw-r--r--runtime/onert/frontend/circle/src/circle_loader.cc16
-rw-r--r--runtime/onert/frontend/nnapi/execution.cc2
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc9
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h21
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h2
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc8
-rw-r--r--runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h4
-rw-r--r--runtime/onert/frontend/tflite/include/tflite_loader.h2
-rw-r--r--runtime/onert/frontend/tflite/src/tflite_loader.cc8
-rw-r--r--runtime/onert/frontend/trix/CMakeLists.txt2
-rw-r--r--runtime/onert/frontend/trix/include/trix_loader.h2
-rw-r--r--runtime/onert/frontend/trix/src/trix_loader.cc32
-rw-r--r--runtime/onert/frontend/trix/src/trix_loader_dummy.cc6
-rw-r--r--runtime/onert/test/CMakeLists.txt15
-rw-r--r--runtime/onert/test/core/compiler/HEScheduler.cc573
-rw-r--r--runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc45
-rw-r--r--runtime/onert/test/core/exec/ExecInstance.cc301
-rw-r--r--runtime/onert/test/core/exec/ExecTime.test.cc103
-rw-r--r--runtime/onert/test/core/interp/ExecManager.cc360
-rw-r--r--runtime/onert/test/core/ir/Graph.cc148
-rw-r--r--runtime/onert/test/core/ir/LayoutSet.cc67
-rw-r--r--runtime/onert/test/core/ir/OperandIndexSet.cc52
-rw-r--r--runtime/onert/test/core/ir/OperandSet.cc45
-rw-r--r--runtime/onert/test/core/ir/OperationSet.cc41
-rw-r--r--runtime/onert/test/core/ir/SetIO.cc99
-rw-r--r--runtime/onert/test/core/ir/Shape.cc58
-rw-r--r--runtime/onert/test/core/ir/UseDef.cc85
-rw-r--r--runtime/onert/test/core/ir/Verifier.cc92
-rw-r--r--runtime/onert/test/core/util/Index.cc34
-rw-r--r--runtime/onert/test/core/util/ObjectManager.cc211
-rw-r--r--runtime/onert/test/core/util/ShapeInference.cc545
-rw-r--r--runtime/service/CMakeLists.txt1
-rw-r--r--runtime/service/npud/CMakeLists.txt21
-rw-r--r--runtime/service/npud/core/Server.cc65
-rw-r--r--runtime/service/npud/core/Server.h55
-rw-r--r--runtime/service/npud/core/Signal.cc56
-rw-r--r--runtime/service/npud/core/Signal.h37
-rw-r--r--runtime/service/npud/core/main.cc40
-rw-r--r--runtime/service/npud/util/Config.lst22
-rw-r--r--runtime/service/npud/util/ConfigSource.cc126
-rw-r--r--runtime/service/npud/util/ConfigSource.h51
-rw-r--r--runtime/service/npud/util/Logging.h88
-rw-r--r--tests/nnapi/CMakeLists.txt5
-rw-r--r--tests/nnfw_api/src/CircleGen.cc28
-rw-r--r--tests/nnfw_api/src/CircleGen.h4
-rw-r--r--tests/nnfw_api/src/GenModelTest.h23
-rw-r--r--tests/nnfw_api/src/GenModelTests.test.cc (renamed from tests/nnfw_api/src/GenModelTests.cc)0
-rw-r--r--tests/nnfw_api/src/ModelTestDynamicTensor.test.cc (renamed from tests/nnfw_api/src/ModelTestDynamicTensor.cc)0
-rw-r--r--tests/nnfw_api/src/ModelTestInputReshaping.test.cc (renamed from tests/nnfw_api/src/ModelTestInputReshaping.cc)0
-rw-r--r--tests/nnfw_api/src/RegressionTests.test.cc (renamed from tests/nnfw_api/src/RegressionTests.cc)0
-rw-r--r--tests/nnfw_api/src/ValidationTestAddModelLoaded.test.cc (renamed from tests/nnfw_api/src/ValidationTestAddModelLoaded.cc)0
-rw-r--r--tests/nnfw_api/src/ValidationTestAddSessionPrepared.test.cc (renamed from tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc)0
-rw-r--r--tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.test.cc (renamed from tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc)0
-rw-r--r--tests/nnfw_api/src/ValidationTestMultipleSessions.test.cc (renamed from tests/nnfw_api/src/ValidationTestMultipleSessions.cc)0
-rw-r--r--tests/nnfw_api/src/ValidationTestPipelineSession.test.cc (renamed from tests/nnfw_api/src/ValidationTestPipelineSession.cc)0
-rw-r--r--tests/nnfw_api/src/ValidationTestSessionCreated.test.cc (renamed from tests/nnfw_api/src/ValidationTestSessionCreated.cc)0
-rw-r--r--tests/nnfw_api/src/ValidationTestSingleSession.test.cc (renamed from tests/nnfw_api/src/ValidationTestSingleSession.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Add.cc301
-rw-r--r--tests/nnfw_api/src/one_op_tests/Add.test.cc301
-rw-r--r--tests/nnfw_api/src/one_op_tests/AddN.test.cc (renamed from tests/nnfw_api/src/one_op_tests/AddN.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/ArgMinMax.cc256
-rw-r--r--tests/nnfw_api/src/one_op_tests/ArgMinMax.test.cc256
-rw-r--r--tests/nnfw_api/src/one_op_tests/AveragePool2D.cc243
-rw-r--r--tests/nnfw_api/src/one_op_tests/AveragePool2D.test.cc243
-rw-r--r--tests/nnfw_api/src/one_op_tests/BatchToSpaceND.test.cc (renamed from tests/nnfw_api/src/one_op_tests/BatchToSpaceND.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Cast.cc173
-rw-r--r--tests/nnfw_api/src/one_op_tests/Cast.test.cc201
-rw-r--r--tests/nnfw_api/src/one_op_tests/Concat.cc244
-rw-r--r--tests/nnfw_api/src/one_op_tests/Concat.test.cc244
-rw-r--r--tests/nnfw_api/src/one_op_tests/Conv2D.cc248
-rw-r--r--tests/nnfw_api/src/one_op_tests/Conv2D.test.cc278
-rw-r--r--tests/nnfw_api/src/one_op_tests/Cos.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Cos.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/DepthToSpace.cc89
-rw-r--r--tests/nnfw_api/src/one_op_tests/DepthToSpace.test.cc89
-rw-r--r--tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc457
-rw-r--r--tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.test.cc502
-rw-r--r--tests/nnfw_api/src/one_op_tests/DetectionPostProcess.test.cc (renamed from tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Elu.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Elu.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Equal.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Equal.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/ExpandDims.test.cc (renamed from tests/nnfw_api/src/one_op_tests/ExpandDims.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Fill.cc148
-rw-r--r--tests/nnfw_api/src/one_op_tests/Fill.test.cc148
-rw-r--r--tests/nnfw_api/src/one_op_tests/Floor.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Floor.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/FloorDiv.test.cc (renamed from tests/nnfw_api/src/one_op_tests/FloorDiv.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/FullyConnected.test.cc (renamed from tests/nnfw_api/src/one_op_tests/FullyConnected.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Greater.test.cc144
-rw-r--r--tests/nnfw_api/src/one_op_tests/GreaterEqual.test.cc145
-rw-r--r--tests/nnfw_api/src/one_op_tests/If.cc132
-rw-r--r--tests/nnfw_api/src/one_op_tests/If.test.cc132
-rw-r--r--tests/nnfw_api/src/one_op_tests/InstanceNorm.test.cc (renamed from tests/nnfw_api/src/one_op_tests/InstanceNorm.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/L2Normalization.test.cc (renamed from tests/nnfw_api/src/one_op_tests/L2Normalization.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/LeakyRelu.test.cc (renamed from tests/nnfw_api/src/one_op_tests/LeakyRelu.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Less.test.cc143
-rw-r--r--tests/nnfw_api/src/one_op_tests/LessEqual.test.cc144
-rw-r--r--tests/nnfw_api/src/one_op_tests/LogSoftmax.test.cc (renamed from tests/nnfw_api/src/one_op_tests/LogSoftmax.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Mean.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Mean.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Mul.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Mul.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Neg.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Neg.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/NotEqual.test.cc158
-rw-r--r--tests/nnfw_api/src/one_op_tests/OneHot.test.cc (renamed from tests/nnfw_api/src/one_op_tests/OneHot.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Pad.cc172
-rw-r--r--tests/nnfw_api/src/one_op_tests/Pad.test.cc172
-rw-r--r--tests/nnfw_api/src/one_op_tests/PadV2.test.cc (renamed from tests/nnfw_api/src/one_op_tests/PadV2.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Quantize.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Quantize.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Rank.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Rank.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Reduce.cc70
-rw-r--r--tests/nnfw_api/src/one_op_tests/Reduce.test.cc70
-rw-r--r--tests/nnfw_api/src/one_op_tests/Relu.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Relu.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Relu6.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Relu6.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc101
-rw-r--r--tests/nnfw_api/src/one_op_tests/ResizeBilinear.test.cc101
-rw-r--r--tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.test.cc (renamed from tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Reverse.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Reverse.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Select.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Select.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Shape.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Shape.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Slice.cc187
-rw-r--r--tests/nnfw_api/src/one_op_tests/Slice.test.cc187
-rw-r--r--tests/nnfw_api/src/one_op_tests/Softmax.cc130
-rw-r--r--tests/nnfw_api/src/one_op_tests/Softmax.test.cc130
-rw-r--r--tests/nnfw_api/src/one_op_tests/Split.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Split.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Sqrt.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Sqrt.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Square.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Square.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/StridedSlice.test.cc (renamed from tests/nnfw_api/src/one_op_tests/StridedSlice.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Sub.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Sub.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Tile.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Tile.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/Transpose.test.cc (renamed from tests/nnfw_api/src/one_op_tests/Transpose.cc)0
-rw-r--r--tests/nnfw_api/src/one_op_tests/While.cc270
-rw-r--r--tests/nnfw_api/src/one_op_tests/While.test.cc270
-rw-r--r--tests/scripts/command/nnpkg-test11
-rw-r--r--tests/scripts/command/prepare-model12
-rw-r--r--tests/tools/nnpackage_run/src/nnpackage_run.cc6
-rw-r--r--tests/tools/nnpackage_run/src/rawformatter.cc26
-rw-r--r--tests/tools/tflite_vanilla_run/CMakeLists.txt11
-rw-r--r--tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc7
-rw-r--r--tools/cross/arm/sources.list.jammy11
-rw-r--r--tools/cross/arm/sources.list.xenial11
-rwxr-xr-xtools/cross/install_rootfs.sh11
-rwxr-xr-xtools/nnpackage_tool/gen_golden/gen_golden.py4
-rwxr-xr-xtools/nnpackage_tool/model2nnpkg/model2nnpkg.sh137
2095 files changed, 77633 insertions, 12831 deletions
diff --git a/.ahub/sam/exclude.txt b/.ahub/sam/exclude.txt
index c9ba5e084..f16f84f3c 100644
--- a/.ahub/sam/exclude.txt
+++ b/.ahub/sam/exclude.txt
@@ -5,6 +5,22 @@
# Eigen
/ONE/compiler/nnc/backends/soft_backend/code_snippets/eigen.def
+# Frontend test tools that are needed for release package build
+/ONE/compiler/circlechef
+/ONE/compiler/circle-verify
+/ONE/compiler/luci/tester
+
+# Exclude IR headers which have lots of similar patterns
+# TODO remove this when refactoring is possible
+/ONE/compiler/luci/lang/include/luci/IR/Nodes
+/ONE/compiler/luci/import/include/luci/Import/Nodes
+/ONE/compiler/loco/include/loco/IR
+/ONE/compiler/tflchef/tflite/src/Op/include
+
+# Exclude interpreter kernels which have similar patterns
+/ONE/compiler/luci-interpreter/src/kernels
+/ONE/compiler/locomotiv/src/Node
+
# Test codes
/ONE/tests
diff --git a/.ahub/tcchecker-tca/config.yaml b/.ahub/tcchecker-tca/config.yaml
index 95e11d0f9..73ec5489f 100644
--- a/.ahub/tcchecker-tca/config.yaml
+++ b/.ahub/tcchecker-tca/config.yaml
@@ -4,30 +4,23 @@ test:
testCaseLanguage: CPP
testFW: GTEST
testCaseFolder:
- - /compute/test/cker
- - /runtime/onert/core/src/backend/basic
- - /runtime/onert/frontend/nnapi
- - /runtime/onert/test/core/compiler
- - /runtime/onert/test/core/exec
- - /runtime/onert/test/core/interp
- - /runtime/onert/test/graph
- - /runtime/onert/test/graph/operand
- - /runtime/onert/test/graph/operation
- - /runtime/onert/test/graph/verifier
- - /runtime/onert/test/ir
- - /runtime/onert/test/util
- - /tests/nnfw_api/src
+ - /compute/cker
+ - /runtime/libs/misc
+ - /runtime/libs/ndarray
+ - /runtime/onert
+ - /tests/nnfw_api
testFile:
- - extension: cpp
+ - extension: test.cpp
any: true
- - extension: cc
+ - extension: test.cc
any: true
testCase:
- condition:
- functionName:
starts:
- TEST
+ - TYPED_TEST
- excludes :
- Verifier.dag_checker
- graph_operand_LayoutSet.layout_set_operators
diff --git a/.github/workflows/check-pr-commit.yml b/.github/workflows/check-pr-commit.yml
index 38c76dc18..a3f4c1c92 100644
--- a/.github/workflows/check-pr-commit.yml
+++ b/.github/workflows/check-pr-commit.yml
@@ -5,6 +5,11 @@ on:
branches:
- master
- release/*
+ types:
+ - opened
+ - synchronize
+ - reopened
+ - ready_for_review
defaults:
run:
@@ -14,6 +19,8 @@ jobs:
check-commit-message:
name: Check commit message
runs-on: ubuntu-20.04
+ # Skip on draft, check on draft -> ready
+ if: github.event.pull_request.draft == false
steps:
- name: Checkout
diff --git a/compiler/arser/include/arser/arser.h b/compiler/arser/include/arser/arser.h
index 1703e421e..43f99dc5e 100644
--- a/compiler/arser/include/arser/arser.h
+++ b/compiler/arser/include/arser/arser.h
@@ -303,7 +303,7 @@ private:
std::string _long_name;
std::string _short_name;
std::vector<std::string> _names;
- std::string _type;
+ std::string _type = "string";
std::string _help_message;
std::function<void(void)> _func;
uint32_t _nargs{1};
@@ -540,16 +540,20 @@ public:
/*
** print usage
*/
+ auto print_usage_arg = [&](const arser::Argument &arg) {
+ stream << " ";
+ std::string arg_name = arser::internal::remove_dash(arg._long_name);
+ std::for_each(arg_name.begin(), arg_name.end(),
+ [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
+ };
stream << "Usage: ./" << parser._program_name << " ";
// required optional argument
for (const auto &arg : parser._optional_arg_vec)
{
if (!arg._is_required)
continue;
- stream << arg._short_name << " ";
- std::string arg_name = arser::internal::remove_dash(arg._long_name);
- std::for_each(arg_name.begin(), arg_name.end(),
- [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
+ stream << arg._short_name;
+ print_usage_arg(arg);
stream << " ";
}
// rest of the optional argument
@@ -560,10 +564,7 @@ public:
stream << "[" << arg._short_name;
if (arg._nargs)
{
- stream << " ";
- std::string arg_name = arser::internal::remove_dash(arg._long_name);
- std::for_each(arg_name.begin(), arg_name.end(),
- [&stream](const char &c) { stream << static_cast<char>(::toupper(c)); });
+ print_usage_arg(arg);
}
stream << "]"
<< " ";
@@ -591,39 +592,28 @@ public:
}
const size_t message_width = 60;
- // positional argument
- if (!parser._positional_arg_vec.empty())
- {
- stream << "[Positional argument]" << std::endl;
- for (const auto &arg : parser._positional_arg_vec)
+ auto print_help_args = [&](const std::list<Argument> &args, const std::string &title) {
+ if (!args.empty())
{
- stream.width(length_of_longest_arg);
- stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t";
- for (size_t i = 0; i < arg._help_message.length(); i += message_width)
+ stream << title << std::endl;
+ for (const auto &arg : args)
{
- if (i)
- stream << std::string(length_of_longest_arg, ' ') << "\t";
- stream << arg._help_message.substr(i, message_width) << std::endl;
+ stream.width(length_of_longest_arg);
+ stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t";
+ for (size_t i = 0; i < arg._help_message.length(); i += message_width)
+ {
+ if (i)
+ stream << std::string(length_of_longest_arg, ' ') << "\t";
+ stream << arg._help_message.substr(i, message_width) << std::endl;
+ }
}
+ std::cout << std::endl;
}
- std::cout << std::endl;
- }
+ };
+ // positional argument
+ print_help_args(parser._positional_arg_vec, "[Positional argument]");
// optional argument
- if (!parser._optional_arg_vec.empty())
- {
- stream << "[Optional argument]" << std::endl;
- for (const auto &arg : parser._optional_arg_vec)
- {
- stream.width(length_of_longest_arg);
- stream << std::left << arser::internal::make_comma_concatenated(arg._names) << "\t";
- for (size_t i = 0; i < arg._help_message.length(); i += message_width)
- {
- if (i)
- stream << std::string(length_of_longest_arg, ' ') << "\t";
- stream << arg._help_message.substr(i, message_width) << std::endl;
- }
- }
- }
+ print_help_args(parser._optional_arg_vec, "[Optional argument]");
return stream;
}
@@ -737,6 +727,29 @@ template <typename T> T Arser::get(const std::string &arg_name)
return get_impl(arg_name, static_cast<T *>(nullptr));
}
+class Helper
+{
+public:
+ static void add_version(Arser &arser, const std::function<void(void)> &func)
+ {
+ arser.add_argument("--version")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("Show version information and exit")
+ .exit_with(func);
+ }
+
+ static void add_verbose(Arser &arser)
+ {
+ arser.add_argument("-V", "--verbose")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("output additional information to stdout or stderr");
+ }
+};
+
} // namespace arser
#endif // __ARSER_H__
diff --git a/compiler/circle-eval-diff/CMakeLists.txt b/compiler/circle-eval-diff/CMakeLists.txt
index 4d86f8097..d5a62301c 100644
--- a/compiler/circle-eval-diff/CMakeLists.txt
+++ b/compiler/circle-eval-diff/CMakeLists.txt
@@ -6,6 +6,7 @@ list(REMOVE_ITEM SOURCES ${TESTS})
add_executable(circle-eval-diff ${DRIVER} ${SOURCES})
target_include_directories(circle-eval-diff PRIVATE include)
+target_include_directories(circle-eval-diff PRIVATE src)
target_link_libraries(circle-eval-diff arser)
target_link_libraries(circle-eval-diff safemain)
@@ -17,6 +18,8 @@ target_link_libraries(circle-eval-diff luci_interpreter)
target_link_libraries(circle-eval-diff dio_hdf5)
target_link_libraries(circle-eval-diff vconone)
+install(TARGETS circle-eval-diff DESTINATION bin)
+
if(NOT ENABLE_TEST)
return()
endif(NOT ENABLE_TEST)
@@ -25,10 +28,15 @@ endif(NOT ENABLE_TEST)
# Instead, we use TEST_SOURCES to specify sources uesd for tests.
set(TEST_SOURCES
"src/MetricPrinter.cpp"
- "src/Tensor.cpp")
+ "src/Tensor.cpp"
+ "src/InputDataLoader.cpp")
nnas_find_package(GTest REQUIRED)
GTest_AddTest(circle_eval_diff_test ${TESTS} ${TEST_SOURCES})
+target_include_directories(circle_eval_diff_test PRIVATE include)
target_include_directories(circle_eval_diff_test PRIVATE src)
target_link_libraries(circle_eval_diff_test luci_testhelper)
target_link_libraries(circle_eval_diff_test nncc_coverage)
+target_link_libraries(circle_eval_diff_test dio_hdf5)
+target_link_libraries(circle_eval_diff_test loco)
+target_link_libraries(circle_eval_diff_test luci_lang)
diff --git a/compiler/circle-eval-diff/driver/Driver.cpp b/compiler/circle-eval-diff/driver/Driver.cpp
index f4a12a403..7e63ec88c 100644
--- a/compiler/circle-eval-diff/driver/Driver.cpp
+++ b/compiler/circle-eval-diff/driver/Driver.cpp
@@ -30,19 +30,15 @@ std::string to_lower_case(std::string s)
return s;
}
-Metric to_metric(const std::string &str)
-{
- if (to_lower_case(str).compare("mae") == 0)
- return Metric::MAE;
-
- throw std::runtime_error("Unsupported metric.");
-}
-
InputFormat to_input_format(const std::string &str)
{
- if (to_lower_case(str).compare("h5") == 0)
+ auto small_str = to_lower_case(str);
+ if (small_str.compare("h5") == 0)
return InputFormat::H5;
+ if (small_str.compare("directory") == 0 || small_str.compare("dir") == 0)
+ return InputFormat::DIR;
+
throw std::runtime_error("Unsupported input format.");
}
@@ -58,50 +54,50 @@ int entry(const int argc, char **argv)
{
arser::Arser arser("Compare inference results of two circle models");
- arser.add_argument("--version")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Show version information and exit")
- .exit_with(print_version);
+ arser::Helper::add_version(arser, print_version);
- arser.add_argument("--first_model")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(true)
- .help("First input model filepath");
+ arser.add_argument("--first_model").required(true).help("First input model filepath");
- arser.add_argument("--second_model")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(true)
- .help("Second input model filepath");
+ arser.add_argument("--second_model").required(true).help("Second input model filepath");
arser.add_argument("--first_input_data")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
.help("Input data filepath for the first model. If not given, circle-eval-diff will run with "
"randomly generated data");
arser.add_argument("--second_input_data")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
.help("Input data filepath for the second model. If not given, circle-eval-diff will run with "
"randomly generated data");
- arser.add_argument("--metric")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
- .default_value("MAE")
- .help("Metric for comparison (default: MAE)");
+ arser.add_argument("--dump_output_with_prefix")
+ .help("Dump output to files. <prefix> should be given as an argument. "
+ "Outputs are saved in <prefix>.<data_index>.first.output<output_index> and "
+ "<prefix>.<data_index>.second.output<output_index>.");
+
+ arser.add_argument("--print_mae").nargs(0).default_value(false).help("Print Mean Absolute Error");
+
+ arser.add_argument("--print_mape")
+ .nargs(0)
+ .default_value(false)
+ .help("Print Mean Absolute PercentageError");
+
+ arser.add_argument("--print_mpeir")
+ .nargs(0)
+ .default_value(false)
+ .help("Print Mean Peak Error to Interval Ratio");
+
+ arser.add_argument("--print_top1_match")
+ .nargs(0)
+ .default_value(false)
+ .help("Print Mean Top-1 Match Ratio");
+
+ arser.add_argument("--print_top5_match")
+ .nargs(0)
+ .default_value(false)
+ .help("Print Mean Top-5 Match Ratio");
+
+ arser.add_argument("--print_mse").nargs(0).default_value(false).help("Print Mean Squared Error");
arser.add_argument("--input_data_format")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
.default_value("h5")
.help("Input data format. h5/hdf5 (default) or directory");
@@ -124,6 +120,7 @@ int entry(const int argc, char **argv)
std::string second_input_data_path;
std::string metric;
std::string input_data_format;
+ std::string output_prefix;
if (arser["--first_input_data"])
first_input_data_path = arser.get<std::string>("--first_input_data");
@@ -135,22 +132,54 @@ int entry(const int argc, char **argv)
throw std::runtime_error("Input data path should be given for both first_model and "
"second_model, or neither must be given.");
- metric = arser.get<std::string>("--metric");
+ if (arser["--dump_output_with_prefix"])
+ output_prefix = arser.get<std::string>("--dump_output_with_prefix");
+
+ // Set Metrics
+ std::vector<Metric> metrics;
+ if (arser["--print_mae"] and arser.get<bool>("--print_mae"))
+ {
+ metrics.emplace_back(Metric::MAE);
+ }
+ if (arser["--print_mape"] and arser.get<bool>("--print_mape"))
+ {
+ metrics.emplace_back(Metric::MAPE);
+ }
+ if (arser["--print_mpeir"] and arser.get<bool>("--print_mpeir"))
+ {
+ metrics.emplace_back(Metric::MPEIR);
+ }
+ if (arser["--print_top1_match"] and arser.get<bool>("--print_top1_match"))
+ {
+ metrics.emplace_back(Metric::MTOP1);
+ }
+ if (arser["--print_top5_match"] and arser.get<bool>("--print_top5_match"))
+ {
+ metrics.emplace_back(Metric::MTOP5);
+ }
+ if (arser["--print_mse"] and arser.get<bool>("--print_mse"))
+ {
+ metrics.emplace_back(Metric::MSE);
+ }
+
input_data_format = arser.get<std::string>("--input_data_format");
auto ctx = std::make_unique<CircleEvalDiff::Context>();
{
ctx->first_model_path = first_model_path;
ctx->second_model_path = second_model_path;
- ctx->metric = to_metric(metric);
+ ctx->first_input_data_path = first_input_data_path;
+ ctx->second_input_data_path = second_input_data_path;
+ ctx->metric = metrics;
ctx->input_format = to_input_format(input_data_format);
+ ctx->output_prefix = output_prefix;
}
CircleEvalDiff ced(std::move(ctx));
ced.init();
- ced.evalDiff(first_input_data_path, second_input_data_path);
+ ced.evalDiff();
return EXIT_SUCCESS;
}
diff --git a/compiler/circle-eval-diff/include/CircleEvalDiff.h b/compiler/circle-eval-diff/include/CircleEvalDiff.h
index bf6aff46d..7894480ac 100644
--- a/compiler/circle-eval-diff/include/CircleEvalDiff.h
+++ b/compiler/circle-eval-diff/include/CircleEvalDiff.h
@@ -20,8 +20,12 @@
#include <luci/IR/Module.h>
#include <luci_interpreter/Interpreter.h>
+#include "InputDataLoader.h"
+#include "MetricPrinter.h"
+
#include <string>
#include <memory>
+#include <vector>
namespace circle_eval_diff
{
@@ -32,14 +36,12 @@ class ModuleEvalDiff;
enum class Metric
{
Undefined, // For debugging
- MAE,
-};
-
-enum class InputFormat
-{
- Undefined, // For debugging
- H5,
- // TODO Implement Random, Directory
+ MAE, // Mean Absolute Error
+ MAPE, // Mean Percentage Absolute Error
+ MPEIR, // Mean Peak Error to Interval Ratio
+ MTOP1, // Mean Top-1 Match Ratio
+ MTOP5, // Mean Top-5 Match Ratio
+ MSE, // Mean Squared Error
};
class CircleEvalDiff final
@@ -49,8 +51,11 @@ public:
{
std::string first_model_path;
std::string second_model_path;
- Metric metric = Metric::Undefined;
+ std::string first_input_data_path;
+ std::string second_input_data_path;
+ std::vector<Metric> metric;
InputFormat input_format = InputFormat::Undefined;
+ std::string output_prefix;
};
public:
@@ -61,12 +66,13 @@ public:
void init();
// Evaluate two circle models for the given input data and compare the results
- void evalDiff(const std::string &first_input_data_path,
- const std::string &second_input_data_path) const;
+ void evalDiff(void) const;
private:
std::unique_ptr<Context> _ctx;
- std::unique_ptr<ModuleEvalDiff> _runner;
+ std::unique_ptr<luci::Module> _first_module;
+ std::unique_ptr<luci::Module> _second_module;
+ std::vector<std::unique_ptr<MetricPrinter>> _metrics;
};
} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/CircleEvalDiff.cpp b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp
index c39a11371..43e026bf6 100644
--- a/compiler/circle-eval-diff/src/CircleEvalDiff.cpp
+++ b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp
@@ -15,8 +15,9 @@
*/
#include "CircleEvalDiff.h"
-#include "ModuleEvalDiff.h"
+#include "InputDataLoader.h"
#include "MetricPrinter.h"
+#include "Tensor.h"
#include <foder/FileLoader.h>
#include <luci/Importer.h>
@@ -26,6 +27,25 @@
namespace
{
+bool same_shape(const luci::CircleNode *a, const luci::CircleNode *b)
+{
+ if (a->rank() != b->rank())
+ return false;
+
+ for (uint32_t i = 0; i < a->rank(); i++)
+ {
+ if (not(a->dim(i) == b->dim(i)))
+ return false;
+ }
+
+ return true;
+}
+
+bool same_dtype(const luci::CircleNode *a, const luci::CircleNode *b)
+{
+ return a->dtype() == b->dtype();
+}
+
std::unique_ptr<luci::Module> import(const std::string &model_path)
{
// Load model from the file
@@ -40,7 +60,12 @@ std::unique_ptr<luci::Module> import(const std::string &model_path)
throw std::runtime_error("Failed to verify circle '" + model_path + "'");
}
- auto module = luci::Importer().importModule(circle::GetModel(model_data.data()));
+ auto circle_model = circle::GetModel(model_data.data());
+
+ if (not circle_model)
+ throw std::runtime_error("Failed to load '" + model_path + "'");
+
+ auto module = luci::Importer().importModule(circle_model);
if (not module)
throw std::runtime_error("Failed to load '" + model_path + "'");
@@ -48,50 +73,192 @@ std::unique_ptr<luci::Module> import(const std::string &model_path)
return module;
}
+const std::vector<loco::Node *> inputs_of(const luci::Module *module)
+{
+ return loco::input_nodes(module->graph());
+}
+
+const std::vector<loco::Node *> outputs_of(const luci::Module *module)
+{
+ return loco::output_nodes(module->graph());
+}
+
+void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
+{
+ std::ofstream fs(filename, std::ofstream::binary);
+ if (fs.fail())
+ throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+ if (fs.write(data, data_size).fail())
+ {
+ throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+ }
+}
+
+void checkOutputs(const luci::Module *first, const luci::Module *second)
+{
+ const auto first_output = outputs_of(first);
+ const auto second_output = outputs_of(second);
+
+ if (first_output.size() != second_output.size())
+ throw std::runtime_error("Models have different output counts");
+
+ for (uint32_t i = 0; i < first_output.size(); i++)
+ {
+ const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+ const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+
+ if (not same_shape(first_node, second_node))
+ throw std::runtime_error("Output shape mismatch (" + first_node->name() + ", " +
+ second_node->name() + ")");
+
+ if (not same_dtype(first_node, second_node))
+ throw std::runtime_error("Output dtype mismatch (" + first_node->name() + ", " +
+ second_node->name() + ")");
+ }
+}
+
} // namespace
namespace circle_eval_diff
{
-CircleEvalDiff::CircleEvalDiff(std::unique_ptr<Context> &&ctx)
- : _ctx(std::move(ctx)), _runner(nullptr)
+std::vector<std::shared_ptr<Tensor>> interpret(const luci::Module *module,
+ const InputDataLoader::Data &data)
+{
+ auto interpreter = std::make_unique<luci_interpreter::Interpreter>(module);
+
+ auto input_nodes = ::inputs_of(module);
+ auto output_nodes = ::outputs_of(module);
+
+ for (uint32_t input_idx = 0; input_idx < data.size(); input_idx++)
+ {
+ auto input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+ assert(input_node->index() == input_idx);
+
+ auto input_data = data.at(input_idx);
+ interpreter->writeInputTensor(input_node, input_data.buffer(), input_data.byte_size());
+ }
+
+ interpreter->interpret();
+
+ std::vector<std::shared_ptr<Tensor>> outputs;
+ for (uint32_t output_idx = 0; output_idx < output_nodes.size(); output_idx++)
+ {
+ auto output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[output_idx]);
+ assert(output_node->index() == output_idx);
+
+ auto tensor = createEmptyTensor(output_node);
+ interpreter->readOutputTensor(output_node, tensor->buffer(), tensor->byte_size());
+ outputs.emplace_back(tensor);
+ }
+
+ return outputs;
+}
+
+CircleEvalDiff::CircleEvalDiff(std::unique_ptr<Context> &&ctx) : _ctx(std::move(ctx))
{
+ // DO NOTHING
}
CircleEvalDiff::~CircleEvalDiff() = default;
void CircleEvalDiff::init()
{
+ _first_module = import(_ctx->first_model_path);
+ _second_module = import(_ctx->second_model_path);
+
+ // Check modules have the same output signature (dtype/shape)
+ // Exception will be thrown if they have different signature
+ checkOutputs(_first_module.get(), _second_module.get());
+
// Set metric
std::unique_ptr<MetricPrinter> metric;
- switch (_ctx->metric)
+ for (auto metric : _ctx->metric)
{
- case Metric::MAE:
- metric = std::make_unique<MAEPrinter>();
- break;
- default:
- throw std::runtime_error("Unsupported metric.");
+ switch (metric)
+ {
+ case Metric::MAE:
+ {
+ _metrics.emplace_back(std::make_unique<MAEPrinter>());
+ break;
+ }
+ case Metric::MAPE:
+ {
+ _metrics.emplace_back(std::make_unique<MAPEPrinter>());
+ break;
+ }
+ case Metric::MPEIR:
+ {
+ _metrics.emplace_back(std::make_unique<MPEIRPrinter>());
+ break;
+ }
+ case Metric::MTOP1:
+ {
+ _metrics.emplace_back(std::make_unique<TopKMatchPrinter>(1));
+ break;
+ }
+ case Metric::MTOP5:
+ {
+ _metrics.emplace_back(std::make_unique<TopKMatchPrinter>(5));
+ break;
+ }
+ case Metric::MSE:
+ {
+ _metrics.emplace_back(std::make_unique<MSEPrinter>());
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported metric.");
+ }
+ _metrics.back()->init(_first_module.get(), _second_module.get());
}
+}
- auto first_module = import(_ctx->first_model_path);
- auto second_module = import(_ctx->second_model_path);
+void CircleEvalDiff::evalDiff(void) const
+{
+ auto first_input_loader = circle_eval_diff::makeDataLoader(
+ _ctx->first_input_data_path, _ctx->input_format, ::inputs_of(_first_module.get()));
+ auto second_input_loader = circle_eval_diff::makeDataLoader(
+ _ctx->second_input_data_path, _ctx->input_format, ::inputs_of(_second_module.get()));
- // Set runner
- switch (_ctx->input_format)
+ for (uint32_t data_idx = 0; data_idx < first_input_loader->size(); data_idx++)
{
- case InputFormat::H5:
- _runner = std::make_unique<H5InputEvalDiff>(std::move(first_module), std::move(second_module),
- std::move(metric));
- break;
- default:
- throw std::runtime_error("Unsupported input format.");
+ std::cout << "Evaluating " << data_idx << "'th data" << std::endl;
+
+ auto first_data = first_input_loader->get(data_idx);
+ auto second_data = second_input_loader->get(data_idx);
+
+ auto first_output = interpret(_first_module.get(), first_data);
+ auto second_output = interpret(_second_module.get(), second_data);
+
+ for (auto &metric : _metrics)
+ {
+ metric->accumulate(first_output, second_output);
+ }
+
+ if (_ctx.get()->output_prefix.empty())
+ continue;
+
+ for (uint32_t i = 0; i < first_output.size(); i++)
+ {
+ auto out = first_output[i];
+ writeDataToFile(_ctx.get()->output_prefix + "." + std::to_string(data_idx) + ".first.output" +
+ std::to_string(i),
+ (char *)(out->buffer()), out->byte_size());
+ }
+ for (uint32_t i = 0; i < second_output.size(); i++)
+ {
+ auto out = second_output[i];
+ writeDataToFile(_ctx.get()->output_prefix + "." + std::to_string(data_idx) +
+ ".second.output" + std::to_string(i),
+ (char *)(out->buffer()), out->byte_size());
+ }
}
-}
-void CircleEvalDiff::evalDiff(const std::string &first_input_data_path,
- const std::string &second_input_data_path) const
-{
- _runner->evalDiff(first_input_data_path, second_input_data_path);
+ for (auto &metric : _metrics)
+ {
+ std::cout << metric.get() << std::endl;
+ }
}
} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/InputDataLoader.cpp b/compiler/circle-eval-diff/src/InputDataLoader.cpp
new file mode 100644
index 000000000..99276f32a
--- /dev/null
+++ b/compiler/circle-eval-diff/src/InputDataLoader.cpp
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InputDataLoader.h"
+
+#include <dio_hdf5/HDF5Importer.h>
+#include <loco/IR/Graph.h>
+#include <luci/IR/CircleNodes.h>
+
+#include <cstring>
+#include <dirent.h>
+#include <fstream>
+#include <vector>
+
+using DataType = loco::DataType;
+using Shape = std::vector<loco::Dimension>;
+
+namespace circle_eval_diff
+{
+
+// Check the type and the shape of CircleInput
+void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape)
+{
+ // Type check
+ if (dtype != input_node->dtype())
+ throw std::runtime_error("Wrong input type.");
+
+ if (shape.size() != input_node->rank())
+ throw std::runtime_error("Input rank mismatch.");
+
+ for (uint32_t i = 0; i < shape.size(); i++)
+ {
+ if (not(shape.at(i) == input_node->dim(i)))
+ throw std::runtime_error("Input shape mismatch.");
+ }
+}
+
+std::vector<size_t> getEachByteSizeOf(const std::vector<loco::Node *> &nodes)
+{
+ std::vector<size_t> vec;
+
+ for (const auto node : nodes)
+ {
+ const auto input_node = loco::must_cast<const luci::CircleInput *>(node);
+ size_t element_size = 1;
+
+ for (uint32_t index = 0; index < input_node->rank(); index++)
+ {
+ element_size *= input_node->dim(index).value();
+ }
+
+ vec.push_back(element_size);
+ }
+
+ return vec;
+}
+
+size_t getTotalByteSizeOf(const std::vector<loco::Node *> &nodes)
+{
+ size_t total_byte_size = 0;
+
+ for (const auto node : nodes)
+ {
+ const auto input_node = loco::must_cast<const luci::CircleInput *>(node);
+ size_t byte_size = loco::size(input_node->dtype());
+
+ for (uint32_t index = 0; index < input_node->rank(); index++)
+ {
+ byte_size *= input_node->dim(index).value();
+ }
+
+ total_byte_size += byte_size;
+ }
+
+ return total_byte_size;
+}
+
+} // namespace circle_eval_diff
+
+namespace circle_eval_diff
+{
+
+HDF5Loader::HDF5Loader(const std::string &file_path, const std::vector<loco::Node *> &input_nodes)
+ : _input_nodes{input_nodes}
+{
+ try
+ {
+ using HDF5Importer = dio::hdf5::HDF5Importer;
+
+ _hdf5 = std::make_unique<HDF5Importer>(file_path);
+ _hdf5->importGroup("value");
+ }
+ catch (const H5::Exception &e)
+ {
+ H5::Exception::printErrorStack();
+ throw std::runtime_error("HDF5 error occurred.");
+ }
+}
+
+uint32_t HDF5Loader::size(void) const { return _hdf5->numData(); }
+
+InputDataLoader::Data HDF5Loader::get(uint32_t data_idx) const
+{
+ Data data;
+ data.resize(_input_nodes.size());
+
+ for (uint32_t input_idx = 0; input_idx < _input_nodes.size(); input_idx++)
+ {
+ auto input_node = loco::must_cast<luci::CircleInput *>(_input_nodes.at(input_idx));
+ assert(input_node->index() == input_idx);
+
+ data.at(input_idx) = *createEmptyTensor(input_node).get();
+
+ auto input_buffer = data.at(input_idx).buffer();
+ try
+ {
+ if (_hdf5->isRawData())
+ {
+ _hdf5->readTensor(data_idx, input_idx, input_buffer);
+ }
+ else
+ {
+ DataType dtype;
+ Shape shape;
+ _hdf5->readTensor(data_idx, input_idx, &dtype, &shape, input_buffer);
+
+ // Check the type and the shape of the input data is valid
+ verifyTypeShape(input_node, dtype, shape);
+ }
+ }
+ catch (const H5::Exception &e)
+ {
+ H5::Exception::printErrorStack();
+ throw std::runtime_error("HDF5 error occurred.");
+ }
+ }
+
+ return data;
+}
+
+DirectoryLoader::DirectoryLoader(const std::string &dir_path,
+ const std::vector<loco::Node *> &input_nodes)
+ : _input_nodes{input_nodes}
+{
+ DIR *dir = opendir(dir_path.c_str());
+ if (not dir)
+ {
+ throw std::runtime_error("Cannot open directory \"" + dir_path + "\".");
+ }
+
+ struct dirent *entry = nullptr;
+ const auto input_total_bytes = getTotalByteSizeOf(input_nodes);
+ while (entry = readdir(dir))
+ {
+ // Skip if the entry is not a regular file
+ if (entry->d_type != DT_REG)
+ continue;
+
+ _data_paths.push_back(dir_path + "/" + entry->d_name);
+ }
+
+ closedir(dir);
+}
+
+uint32_t DirectoryLoader::size(void) const { return _data_paths.size(); }
+
+InputDataLoader::Data DirectoryLoader::get(uint32_t data_idx) const
+{
+ // Read raw data
+ const auto input_total_bytes = getTotalByteSizeOf(_input_nodes);
+ std::vector<char> input_data(input_total_bytes);
+ const auto raw_data_path = _data_paths.at(data_idx);
+ std::ifstream fs(raw_data_path, std::ifstream::binary);
+
+ if (fs.fail())
+ {
+ throw std::runtime_error("Cannot open file \"" + raw_data_path + "\".");
+ }
+ if (fs.read(input_data.data(), input_total_bytes).fail())
+ {
+ throw std::runtime_error("Failed to read raw data from file \"" + raw_data_path + "\".");
+ }
+
+ // Make Tensor from raw data
+ auto input_data_cur = input_data.data();
+
+ Data data;
+ data.resize(_input_nodes.size());
+ std::vector<size_t> input_bytes = getEachByteSizeOf(_input_nodes);
+ for (uint32_t index = 0; index < _input_nodes.size(); index++)
+ {
+ const auto input_node = loco::must_cast<const luci::CircleInput *>(_input_nodes.at(index));
+ auto &tensor = data.at(index);
+ tensor = *createEmptyTensor(input_node).get();
+ auto buffer = tensor.buffer();
+ std::memcpy(buffer, input_data_cur, input_bytes.at(index));
+ input_data_cur += input_bytes.at(index);
+ }
+
+ return data;
+}
+
+std::unique_ptr<InputDataLoader> makeDataLoader(const std::string &file_path,
+ const InputFormat &format,
+ const std::vector<loco::Node *> &input_nodes)
+{
+ switch (format)
+ {
+ case InputFormat::H5:
+ {
+ return std::make_unique<HDF5Loader>(file_path, input_nodes);
+ }
+ case InputFormat::DIR:
+ {
+ return std::make_unique<DirectoryLoader>(file_path, input_nodes);
+ }
+ default:
+ throw std::runtime_error{"Unsupported input format."};
+ }
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/InputDataLoader.h b/compiler/circle-eval-diff/src/InputDataLoader.h
new file mode 100644
index 000000000..14921b239
--- /dev/null
+++ b/compiler/circle-eval-diff/src/InputDataLoader.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__
+#define __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__
+
+#include <dio_hdf5/HDF5Importer.h>
+#include <loco/IR/Node.h>
+#include <luci/IR/CircleNodes.h>
+
+#include "Tensor.h"
+
+#include <memory>
+#include <string>
+
+namespace circle_eval_diff
+{
+
+void verifyTypeShape(const luci::CircleInput *input_node, const loco::DataType &dtype,
+ const std::vector<loco::Dimension> &shape);
+
+} // namespace circle_eval_diff
+
+namespace circle_eval_diff
+{
+
+enum class InputFormat
+{
+ Undefined, // For debugging
+ H5,
+ DIR, // directory
+ // TODO Implement Random, Directory
+};
+
+class InputDataLoader
+{
+public:
+ using Data = std::vector<Tensor>;
+
+public:
+ virtual ~InputDataLoader() = default;
+
+public:
+ virtual uint32_t size(void) const = 0;
+
+public:
+ virtual Data get(uint32_t data_idx) const = 0;
+};
+
+class HDF5Loader final : public InputDataLoader
+{
+public:
+ HDF5Loader(const std::string &file_path, const std::vector<loco::Node *> &input_nodes);
+
+public:
+ uint32_t size(void) const final;
+ Data get(uint32_t data_idx) const final;
+
+private:
+ const std::vector<loco::Node *> _input_nodes;
+ std::unique_ptr<dio::hdf5::HDF5Importer> _hdf5;
+};
+
+// This class loads the directory that has raw data binary files.
+class DirectoryLoader final : public InputDataLoader
+{
+public:
+ DirectoryLoader(const std::string &dir_path, const std::vector<loco::Node *> &input_nodes);
+
+public:
+ uint32_t size(void) const final;
+ Data get(uint32_t data_idx) const final;
+
+private:
+ const std::vector<loco::Node *> _input_nodes;
+ std::vector<std::string> _data_paths;
+};
+
+std::unique_ptr<InputDataLoader> makeDataLoader(const std::string &file_path,
+ const InputFormat &format,
+ const std::vector<loco::Node *> &input_nodes);
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__
diff --git a/compiler/circle-eval-diff/src/InputDataLoader.test.cpp b/compiler/circle-eval-diff/src/InputDataLoader.test.cpp
new file mode 100644
index 000000000..cbe78797b
--- /dev/null
+++ b/compiler/circle-eval-diff/src/InputDataLoader.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <luci/IR/CircleNodes.h>
+
+#include "InputDataLoader.h"
+
+using namespace circle_eval_diff;
+
+TEST(CircleEvalInputDataLoaderTest, verifyTypeShapeTest)
+{
+ luci::CircleInput input;
+ input.dtype(loco::DataType::FLOAT32);
+ input.rank(4);
+ input.dim(0).set(1);
+ input.dim(1).set(3);
+ input.dim(2).set(3);
+ input.dim(3).set(2);
+
+ loco::DataType right_data_type{loco::DataType::FLOAT32};
+ std::vector<loco::Dimension> right_shape;
+ right_shape.emplace_back(1);
+ right_shape.emplace_back(3);
+ right_shape.emplace_back(3);
+ right_shape.emplace_back(2);
+
+ EXPECT_NO_THROW(verifyTypeShape(&input, right_data_type, right_shape));
+}
+
+TEST(CircleEvalInputDataLoaderTest, verifyTypeShapeTest_NEG)
+{
+ luci::CircleInput input;
+ input.dtype(loco::DataType::FLOAT32);
+ input.rank(4);
+ input.dim(0).set(1);
+ input.dim(1).set(4);
+ input.dim(2).set(4);
+ input.dim(3).set(2);
+
+ loco::DataType right_data_type{loco::DataType::FLOAT32};
+ loco::DataType wrong_data_type{loco::DataType::FLOAT16};
+ std::vector<loco::Dimension> wrong_shape;
+ wrong_shape.emplace_back(1);
+ wrong_shape.emplace_back(3);
+ wrong_shape.emplace_back(3);
+ wrong_shape.emplace_back(2);
+
+ EXPECT_ANY_THROW(verifyTypeShape(&input, right_data_type, wrong_shape));
+ EXPECT_ANY_THROW(verifyTypeShape(&input, wrong_data_type, wrong_shape));
+}
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.cpp b/compiler/circle-eval-diff/src/MetricPrinter.cpp
index d65eb9b63..ec8408471 100644
--- a/compiler/circle-eval-diff/src/MetricPrinter.cpp
+++ b/compiler/circle-eval-diff/src/MetricPrinter.cpp
@@ -18,6 +18,7 @@
#include <luci/IR/CircleNode.h>
+#include <limits>
#include <iostream>
#include <cassert>
@@ -30,6 +31,16 @@ using Tensor = circle_eval_diff::Tensor;
namespace
{
+uint32_t num_elems(const luci::CircleNode *node)
+{
+ uint32_t res = 1;
+
+ for (uint32_t i = 0; i < node->rank(); i++)
+ res *= node->dim(i).value();
+
+ return res;
+}
+
template <typename T> bool same_shape(const T a, const T b)
{
if (a->rank() != b->rank())
@@ -44,6 +55,8 @@ template <typename T> bool same_shape(const T a, const T b)
return true;
}
+template <typename T> bool same_dtype(const T a, const T b) { return a->dtype() == b->dtype(); }
+
template <loco::DataType DT> std::shared_ptr<Tensor> to_fp32(const std::shared_ptr<Tensor> &tensor)
{
assert(tensor->dtype() == DT); // FIX_CALLER_UNLESS
@@ -97,7 +110,6 @@ void MAEPrinter::init(const luci::Module *first, const luci::Module *second)
{
const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
- assert(same_shape(first_node, second_node)); // FIX_CALLER_UNLESS
// Create tensors to store intermediate results
_intermediate.emplace_back();
@@ -180,6 +192,471 @@ void MAEPrinter::dump(std::ostream &os) const
}
}
+// TODO Remove duplicate codes with MAEPrinter
+void MAPEPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+ THROW_UNLESS(first != nullptr, "Invalid module.");
+ THROW_UNLESS(second != nullptr, "Invalid module.");
+
+ const auto first_output = loco::output_nodes(first->graph());
+ const auto second_output = loco::output_nodes(second->graph());
+
+ assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+ for (uint32_t i = 0; i < first_output.size(); i++)
+ {
+ const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+ const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+
+ // Create tensors to store intermediate results
+ _intermediate.emplace_back();
+ _intermediate.at(i).dtype(loco::DataType::FLOAT32);
+ // NOTE Use both first_node and second_node to avoid release build break
+ _intermediate.at(i).rank(first_node->rank());
+ uint32_t num_elems = 1;
+ for (uint32_t j = 0; j < second_node->rank(); j++)
+ {
+ _intermediate.at(i).dim(j) = second_node->dim(j);
+ num_elems *= second_node->dim(j).value();
+ }
+ _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems);
+
+ // Check the buffer is initilized with zero
+ for (uint32_t j = 0; j < num_elems; j++)
+ assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0);
+
+ // Save output names for logging
+ _output_names.emplace_back(first_node->name());
+ }
+}
+
+// Accumulate |(a - b) / a|
+void MAPEPrinter::accum_mean_absolute_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+ const std::shared_ptr<Tensor> &b)
+{
+ assert(a->dtype() == loco::DataType::FLOAT32 and
+ b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+ assert(same_shape(a.get(), b.get())); // FIX_CALLER_UNLESS
+ assert(output_idx < _intermediate.size()); // FIX_CALLER_UNLESS
+
+ for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+ {
+ const auto a_val = a->at<loco::DataType::FLOAT32>(i);
+ const auto b_val = b->at<loco::DataType::FLOAT32>(i);
+ _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) +=
+ std::abs((a_val - b_val) / a_val);
+ }
+}
+
+// Assumption
+// first: the result of fp32 model
+// second: the result of fake-quantized model
+void MAPEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+ const std::vector<std::shared_ptr<Tensor>> &second)
+{
+ assert(first.size() == second.size()); // FIX_CALLER_UNLESS
+ assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+ for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+ {
+ const auto first_output = first[output_idx];
+ const auto second_output = second[output_idx];
+
+ // Cast data to fp32 and then compute absolute error
+ const auto fp32_first_output = fp32(first_output);
+ const auto fp32_second_output = fp32(second_output);
+
+ accum_mean_absolute_error(output_idx, fp32_first_output, fp32_second_output);
+ }
+
+ _num_data++;
+}
+
+void MAPEPrinter::dump(std::ostream &os) const
+{
+ os << "Mean Absolute Percentage Error (MAPE)" << std::endl;
+
+ for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+ {
+ const auto name = _output_names.at(output_idx);
+ const auto &inter = _intermediate.at(output_idx);
+ assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS
+ const auto elem_count = inter.size<loco::DataType::FLOAT32>();
+
+ // Compute MAPE
+ float mape = 0.0;
+ for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++)
+ mape += inter.at<loco::DataType::FLOAT32>(elem_idx);
+
+ mape = mape / elem_count;
+ mape = mape / _num_data;
+ mape *= 100.0;
+
+ os << "MAPE for " << name << " is " << mape << "%" << std::endl;
+ }
+}
+
+// TODO Remove duplicate codes with MAEPrinter
+void MPEIRPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+ THROW_UNLESS(first != nullptr, "Invalid module.");
+ THROW_UNLESS(second != nullptr, "Invalid module.");
+
+ const auto first_output = loco::output_nodes(first->graph());
+ const auto second_output = loco::output_nodes(second->graph());
+
+ assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+ for (uint32_t i = 0; i < first_output.size(); i++)
+ {
+ const auto first_node = loco::must_cast<luci::CircleOutput *>(first_output[i]);
+ const auto second_node = loco::must_cast<luci::CircleOutput *>(second_output[i]);
+
+ // Create places to store intermediate results
+ _intermediate.emplace_back(0.0);
+
+ // Save output names for logging
+ _output_names.emplace_back(first_node->name());
+ }
+}
+
+// Accumulate PEIR (Peak Error to Interval Ratio)
+// PEIR = max(|a - b|) / (max(a) - min(a))
+// PEIR >= 0 (lower is better)
+void MPEIRPrinter::accum_peir(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+ const std::shared_ptr<Tensor> &b)
+{
+ assert(a->dtype() == loco::DataType::FLOAT32 and
+ b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+ assert(same_shape(a.get(), b.get())); // FIX_CALLER_UNLESS
+ assert(output_idx < _intermediate.size()); // FIX_CALLER_UNLESS
+
+ float min = std::numeric_limits<float>::max();
+ float max = std::numeric_limits<float>::lowest();
+
+ for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+ {
+ const auto a_val = a->at<loco::DataType::FLOAT32>(i);
+ min = std::min(a_val, min);
+ max = std::max(a_val, max);
+ }
+
+ float interval = max - min;
+
+ // Corner case: All values are the same. We set interval = 1 in this case
+ if (interval == 0)
+ interval = 1.0;
+
+ float peak_error = std::numeric_limits<float>::lowest();
+
+ for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+ {
+ const auto a_val = a->at<loco::DataType::FLOAT32>(i);
+ const auto b_val = b->at<loco::DataType::FLOAT32>(i);
+ const auto error = std::abs(a_val - b_val);
+ peak_error = std::max(error, peak_error);
+ }
+
+ _intermediate.at(output_idx) += peak_error / interval;
+}
+
+// Assumption (when testing the accuracy of quantized model)
+// first: the result of fp32 model
+// second: the result of fake-quantized model
+void MPEIRPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+ const std::vector<std::shared_ptr<Tensor>> &second)
+{
+ assert(first.size() == second.size()); // FIX_CALLER_UNLESS
+ assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+ for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+ {
+ const auto first_output = first[output_idx];
+ const auto second_output = second[output_idx];
+
+ // Cast data to fp32 for ease of computation
+ const auto fp32_first_output = fp32(first_output);
+ const auto fp32_second_output = fp32(second_output);
+
+ accum_peir(output_idx, fp32_first_output, fp32_second_output);
+ }
+
+ _num_data++;
+}
+
+void MPEIRPrinter::dump(std::ostream &os) const
+{
+ os << "Mean Peak Error to Interval Ratio (MPEIR)" << std::endl;
+
+ for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+ {
+ const auto name = _output_names.at(output_idx);
+ const auto sum_of_peir = _intermediate.at(output_idx);
+
+ // Compute MPEIR
+ float mpeir = sum_of_peir / _num_data;
+
+ os << "MPEIR for " << name << " is " << mpeir << std::endl;
+ }
+}
+
+// TODO Remove duplicate codes with MAEPrinter
+void TopKMatchPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+ THROW_UNLESS(first != nullptr, "Invalid module.");
+ THROW_UNLESS(second != nullptr, "Invalid module.");
+
+ const auto first_output = loco::output_nodes(first->graph());
+ const auto second_output = loco::output_nodes(second->graph());
+
+ assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+ for (uint32_t i = 0; i < first_output.size(); i++)
+ {
+ const auto first_node = loco::must_cast<luci::CircleOutput *>(first_output[i]);
+ const auto second_node = loco::must_cast<luci::CircleOutput *>(second_output[i]);
+
+ // Create places to store intermediate results
+ _intermediate.emplace_back(0.0);
+
+ // Save output names for logging
+ _output_names.emplace_back(first_node->name());
+
+ // If num_elems of an output is less than k,
+ // the output index is added to the skip list
+ if (num_elems(first_node) < _k)
+ {
+ std::cout << "Top-" << _k << "metric for " << first_node->name()
+ << " is ignored, because it has elements less than " << _k << std::endl;
+ _skip_output.emplace_back(i);
+ }
+ }
+}
+
+void TopKMatchPrinter::accum_topk_accuracy(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+ const std::shared_ptr<Tensor> &b)
+{
+ assert(a->dtype() == loco::DataType::FLOAT32 and
+ b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+ assert(same_shape(a.get(), b.get())); // FIX_CALLER_UNLESS
+ assert(output_idx < _intermediate.size()); // FIX_CALLER_UNLESS
+
+ // Find Top-k largest elements
+ // This implementation is a variant of "Method 2 (Use temporary array)" in
+ // https://www.geeksforgeeks.org/k-largestor-smallest-elements-in-an-array/
+ // We sort top-k elements by value and index to ensure that the element with an earlier
+ // index comes first if multiple elements have the same value.
+ auto find_topk = [this](const std::shared_ptr<Tensor> &tensor) {
+ assert(_k <= tensor->size<loco::DataType::FLOAT32>()); // FIX_CALLER_UNLESS
+
+ // first: value, second: index
+ std::vector<std::pair<float, uint32_t>> topk;
+ topk.resize(_k);
+
+ // Initialize
+ for (uint32_t i = 0; i < _k; i++)
+ {
+ topk[i] = std::make_pair(tensor->at<loco::DataType::FLOAT32>(i), i);
+ }
+
+ // Input pair: (value, index)
+ // Return true if a has smaller value than b. If a and b have the same value,
+ // return true if a has larger index.
+ auto compare = [](const std::pair<float, uint32_t> &a, const std::pair<float, uint32_t> &b) {
+ if (a.first == b.first)
+ return a.second > b.second;
+
+ return a.first < b.first;
+ };
+
+ for (uint32_t i = _k; i < tensor->size<loco::DataType::FLOAT32>(); i++)
+ {
+ auto val = std::make_pair(tensor->at<loco::DataType::FLOAT32>(i), i);
+
+ auto min = std::min_element(topk.begin(), topk.end(), compare);
+ if (compare(*min, val))
+ {
+ // val is larger than min. Replace min with val.
+ auto min_index = std::distance(topk.begin(), min);
+ topk[min_index] = val;
+ }
+ }
+
+ return topk;
+ };
+
+ auto first_topk = find_topk(a);
+ auto second_topk = find_topk(b);
+
+ uint32_t matched = 0;
+ for (uint32_t i = 0; i < _k; i++)
+ {
+ for (uint32_t j = 0; j < _k; j++)
+ {
+ if (first_topk[i].second == second_topk[j].second)
+ {
+ matched++;
+ break;
+ }
+ }
+ }
+
+ float matched_ratio = static_cast<float>(matched) / _k;
+
+ _intermediate.at(output_idx) += matched_ratio;
+}
+
+bool TopKMatchPrinter::in_skip_list(uint32_t output_index) const
+{
+ for (auto skip : _skip_output)
+ {
+ if (output_index == skip)
+ return true;
+ }
+
+ return false;
+}
+
+void TopKMatchPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+ const std::vector<std::shared_ptr<Tensor>> &second)
+{
+ assert(first.size() == second.size()); // FIX_CALLER_UNLESS
+ assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+ for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+ {
+ if (in_skip_list(output_idx))
+ continue;
+
+ const auto first_output = first[output_idx];
+ const auto second_output = second[output_idx];
+
+ // Cast data to fp32 for ease of computation
+ const auto fp32_first_output = fp32(first_output);
+ const auto fp32_second_output = fp32(second_output);
+
+ accum_topk_accuracy(output_idx, fp32_first_output, fp32_second_output);
+ }
+
+ _num_data++;
+}
+
+void TopKMatchPrinter::dump(std::ostream &os) const
+{
+ os << "Ratio of Matched Indices between Top-" << _k << " results of the models" << std::endl;
+
+ for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+ {
+ if (in_skip_list(output_idx))
+ continue;
+
+ const auto name = _output_names.at(output_idx);
+ const auto sum_of_topk_accuracy = _intermediate.at(output_idx);
+
+ // Compute TopKMatch
+ float mean_topk = sum_of_topk_accuracy / _num_data;
+
+ os << "Mean Top-" << _k << " match ratio for " << name << " is " << mean_topk << std::endl;
+ }
+}
+
+void MSEPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+ THROW_UNLESS(first != nullptr, "Invalid module.");
+ THROW_UNLESS(second != nullptr, "Invalid module.");
+
+ const auto first_output = loco::output_nodes(first->graph());
+ const auto second_output = loco::output_nodes(second->graph());
+
+ assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+ for (uint32_t i = 0; i < first_output.size(); i++)
+ {
+ const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+ const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+
+ // Create tensors to store intermediate results
+ _intermediate.emplace_back();
+ _intermediate.at(i).dtype(loco::DataType::FLOAT32);
+ // NOTE Use both first_node and second_node to avoid release build break
+ _intermediate.at(i).rank(first_node->rank());
+ uint32_t num_elems = 1;
+ for (uint32_t j = 0; j < second_node->rank(); j++)
+ {
+ _intermediate.at(i).dim(j) = second_node->dim(j);
+ num_elems *= second_node->dim(j).value();
+ }
+ _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems);
+
+ // Check the buffer is initilized with zero
+ for (uint32_t j = 0; j < num_elems; j++)
+ assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0);
+
+ // Save output names for logging
+ _output_names.emplace_back(first_node->name());
+ }
+}
+
+void MSEPrinter::accum_squared_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+ const std::shared_ptr<Tensor> &b)
+{
+ assert(a->dtype() == loco::DataType::FLOAT32 and
+ b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+ assert(same_shape(a.get(), b.get())); // FIX_CALLER_UNLESS
+ assert(output_idx < _intermediate.size()); // FIX_CALLER_UNLESS
+
+ for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+ {
+ _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) +=
+ (a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i)) *
+ (a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i));
+ }
+}
+
+void MSEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+ const std::vector<std::shared_ptr<Tensor>> &second)
+{
+ assert(first.size() == second.size()); // FIX_CALLER_UNLESS
+ assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+ for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+ {
+ const auto first_output = first[output_idx];
+ const auto second_output = second[output_idx];
+
+ // Cast data to fp32 and then compute absolute error
+ const auto fp32_first_output = fp32(first_output);
+ const auto fp32_second_output = fp32(second_output);
+
+ accum_squared_error(output_idx, fp32_first_output, fp32_second_output);
+ }
+
+ _num_data++;
+}
+
+void MSEPrinter::dump(std::ostream &os) const
+{
+ os << "Mean Squared Error (MSE)" << std::endl;
+
+ for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+ {
+ const auto name = _output_names.at(output_idx);
+ const auto &inter = _intermediate.at(output_idx);
+ assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS
+ const auto elem_count = inter.size<loco::DataType::FLOAT32>();
+
+ // Compute MSE
+ float mse = 0.0;
+ for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++)
+ mse += inter.at<loco::DataType::FLOAT32>(elem_idx);
+
+ mse = mse / elem_count;
+ mse = mse / _num_data;
+
+ os << "MSE for " << name << " is " << mse << std::endl;
+ }
+}
+
} // namespace circle_eval_diff
#undef THROW_UNLESS
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.h b/compiler/circle-eval-diff/src/MetricPrinter.h
index b51581c31..c8f27511c 100644
--- a/compiler/circle-eval-diff/src/MetricPrinter.h
+++ b/compiler/circle-eval-diff/src/MetricPrinter.h
@@ -85,6 +85,133 @@ private:
uint32_t _num_data = 0;
};
+// Mean Squared Error
+class MSEPrinter final : public MetricPrinter
+{
+public:
+ void init(const luci::Module *first, const luci::Module *second);
+
+ void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+ const std::vector<std::shared_ptr<Tensor>> &second);
+
+ void dump(std::ostream &os) const;
+
+private:
+ void accum_squared_error(uint32_t index, const std::shared_ptr<Tensor> &a,
+ const std::shared_ptr<Tensor> &b);
+
+private:
+ // Store accumulated sum of absolute error for each output
+ std::vector<Tensor> _intermediate;
+ std::vector<std::string> _output_names;
+ uint32_t _num_data = 0;
+};
+
+// Mean Absolute Percentage Error
+class MAPEPrinter final : public MetricPrinter
+{
+public:
+ void init(const luci::Module *first, const luci::Module *second);
+
+ void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+ const std::vector<std::shared_ptr<Tensor>> &second);
+
+ void dump(std::ostream &os) const;
+
+private:
+ void accum_mean_absolute_error(uint32_t index, const std::shared_ptr<Tensor> &a,
+ const std::shared_ptr<Tensor> &b);
+
+private:
+ // Store accumulated sum of absolute error for each output
+ std::vector<Tensor> _intermediate;
+ std::vector<std::string> _output_names;
+ uint32_t _num_data = 0;
+};
+
+// Mean Peak Error to Interval Ratio (PEIR)
+// PEIR = max(|a - b|) / (max(a) - min(a))
+// PEIR >= 0 (lower is better)
+//
+// When testing the accuracy of quantized model,
+// the first model should be the original fp32 model, and
+// the second model should be the fake-quantized fp32 model
+class MPEIRPrinter final : public MetricPrinter
+{
+public:
+ void init(const luci::Module *first, const luci::Module *second);
+
+ void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+ const std::vector<std::shared_ptr<Tensor>> &second);
+
+ void dump(std::ostream &os) const;
+
+private:
+ void accum_peir(uint32_t index, const std::shared_ptr<Tensor> &a,
+ const std::shared_ptr<Tensor> &b);
+
+private:
+ // Store accumulated sum of PEIR for each output
+ std::vector<float> _intermediate;
+ std::vector<std::string> _output_names;
+ uint32_t _num_data = 0;
+};
+
+// Ratio of matched indices between top-k results of two models (a, b).
+//
+// top-k match = intersection(top_k_idx(a), top_k_idx(b)) / k
+// mean top-k match = sum(top-k match) / num_data
+//
+// For example,
+// num_data = 2
+// first model output = [1, 2, 3], [2, 3, 1]
+// second model output = [2, 4, 6], [3, 2, 1]
+//
+// if k = 1,
+// first model top-1 index = ([2], [1])
+// second model top-1 index = ([2], [0])
+// mean top-1 accuracy = (1 + 0) / 2 = 0.5
+//
+// if k = 2,
+// first model output = [1, 2, 3], [2, 3, 1]
+// second model output = [2, 4, 6], [3, 2, 1]
+// first model top-2 index = ([2, 1], [1, 0])
+// second model top-2 index = ([2, 1], [0, 1])
+// mean top-2 accuracy = (2 + 2) / 4 = 1
+//
+// NOTE Order of elements is ignored when comparing two top-k sets.
+// NOTE If two elements have the same value and only one can be included in top-k,
+// the one with an earlier index will be included.
+class TopKMatchPrinter : public MetricPrinter
+{
+public:
+ TopKMatchPrinter(uint32_t k) : _k(k) {}
+
+public:
+ void init(const luci::Module *first, const luci::Module *second);
+
+ void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+ const std::vector<std::shared_ptr<Tensor>> &second);
+
+ void dump(std::ostream &os) const;
+
+private:
+ void accum_topk_accuracy(uint32_t index, const std::shared_ptr<Tensor> &a,
+ const std::shared_ptr<Tensor> &b);
+
+ // Return true if the output is in the skip list (_skip_output)
+ bool in_skip_list(uint32_t output_index) const;
+
+private:
+ const uint32_t _k = 0;
+ // Store accumulated accuracy
+ std::vector<float> _intermediate;
+ std::vector<std::string> _output_names;
+ uint32_t _num_data = 0;
+ // Save index of output whose num_elements is less than k
+ std::vector<uint32_t> _skip_output;
+};
+
} // namespace circle_eval_diff
#endif // __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.test.cpp b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp
index 51ca89799..0e71b80cc 100644
--- a/compiler/circle-eval-diff/src/MetricPrinter.test.cpp
+++ b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp
@@ -180,6 +180,23 @@ std::shared_ptr<Tensor> output_tensor_with_value(const luci::Module *module, flo
return tensor;
}
+std::shared_ptr<Tensor> output_tensor_with_value(const luci::Module *module,
+ std::vector<float> &value)
+{
+ auto outputs = loco::output_nodes(module->graph());
+ assert(outputs.size() == 1);
+ auto output = *outputs.begin();
+ auto output_cnode = loco::must_cast<luci::CircleNode *>(output);
+ auto tensor = create_empty_tensor(output_cnode);
+ auto tensor_size = tensor->size<loco::DataType::FLOAT32>();
+ assert(tensor_size == value.size());
+ for (uint32_t i = 0; i < tensor_size; i++)
+ {
+ tensor->at<loco::DataType::FLOAT32>(i) = value[i];
+ }
+ return tensor;
+}
+
} // namespace
namespace circle_eval_diff
@@ -233,4 +250,299 @@ TEST(CircleEvalMetricPrinterTest, MAE_init_with_null_NEG)
EXPECT_ANY_THROW(mae.init(nullptr, nullptr));
}
+TEST(CircleEvalMetricPrinterTest, MAPE_simple)
+{
+ luci::Module first;
+ AddOneGraph first_g;
+ first_g.init();
+
+ first.add(std::move(first_g.graph()));
+
+ luci::Module second;
+ AddTwoGraph second_g;
+ second_g.init();
+
+ second.add(std::move(second_g.graph()));
+
+ MAPEPrinter mape;
+
+ mape.init(&first, &second);
+
+ // This test does not actually evaluate the modules, but create
+ // fake results.
+ std::vector<std::shared_ptr<Tensor>> first_result;
+ {
+ auto output = output_tensor_with_value(&first, 2.0);
+ first_result.emplace_back(output);
+ }
+
+ std::vector<std::shared_ptr<Tensor>> second_result;
+ {
+ auto output = output_tensor_with_value(&second, 1.0);
+ second_result.emplace_back(output);
+ }
+
+ mape.accumulate(first_result, second_result);
+
+ std::stringstream ss;
+ mape.dump(ss);
+ std::string result = ss.str();
+
+ EXPECT_NE(std::string::npos, result.find("MAPE for output_0 is 50%"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MAPE_init_with_null_NEG)
+{
+ MAPEPrinter mape;
+
+ EXPECT_ANY_THROW(mape.init(nullptr, nullptr));
+}
+
+TEST(CircleEvalMetricPrinterTest, MPEIR_simple)
+{
+ luci::Module first;
+ AddOneGraph first_g;
+ first_g.init();
+
+ first.add(std::move(first_g.graph()));
+
+ luci::Module second;
+ AddTwoGraph second_g;
+ second_g.init();
+
+ second.add(std::move(second_g.graph()));
+
+ MPEIRPrinter mpeir;
+
+ mpeir.init(&first, &second);
+
+ // This test does not actually evaluate the modules, but create
+ // fake results.
+ std::vector<std::shared_ptr<Tensor>> first_result;
+ {
+ std::vector<float> val;
+ val.resize(16);
+ for (uint32_t i = 0; i < 16; i++)
+ val[i] = i;
+
+ auto output = output_tensor_with_value(&first, val);
+ first_result.emplace_back(output);
+ }
+
+ std::vector<std::shared_ptr<Tensor>> second_result;
+ {
+ auto output = output_tensor_with_value(&second, 0.0);
+ second_result.emplace_back(output);
+ }
+
+ mpeir.accumulate(first_result, second_result);
+
+ std::stringstream ss;
+ mpeir.dump(ss);
+ std::string result = ss.str();
+
+ EXPECT_NE(std::string::npos, result.find("MPEIR for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MPEIR_init_with_null_NEG)
+{
+ MPEIRPrinter mpeir;
+
+ EXPECT_ANY_THROW(mpeir.init(nullptr, nullptr));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_simple)
+{
+ luci::Module first;
+ AddOneGraph first_g;
+ first_g.init();
+
+ first.add(std::move(first_g.graph()));
+
+ luci::Module second;
+ AddTwoGraph second_g;
+ second_g.init();
+
+ second.add(std::move(second_g.graph()));
+
+ TopKMatchPrinter top5(5);
+
+ top5.init(&first, &second);
+
+ // This test does not actually evaluate the modules, but create
+ // fake results.
+ std::vector<std::shared_ptr<Tensor>> first_result;
+ {
+ std::vector<float> val;
+ val.resize(16);
+ for (uint32_t i = 0; i < 16; i++)
+ val[i] = i;
+
+ auto output = output_tensor_with_value(&first, val);
+ first_result.emplace_back(output);
+ }
+
+ std::vector<std::shared_ptr<Tensor>> second_result;
+ {
+ std::vector<float> val;
+ val.resize(16);
+ for (uint32_t i = 0; i < 16; i++)
+ val[i] = i * 2;
+ auto output = output_tensor_with_value(&second, val);
+ second_result.emplace_back(output);
+ }
+
+ top5.accumulate(first_result, second_result);
+
+ std::stringstream ss;
+ top5.dump(ss);
+ std::string result = ss.str();
+
+ EXPECT_NE(std::string::npos, result.find("Mean Top-5 match ratio for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_tie)
+{
+ luci::Module first;
+ AddOneGraph first_g;
+ first_g.init();
+
+ first.add(std::move(first_g.graph()));
+
+ luci::Module second;
+ AddTwoGraph second_g;
+ second_g.init();
+
+ second.add(std::move(second_g.graph()));
+
+ TopKMatchPrinter top5(5);
+
+ top5.init(&first, &second);
+
+ // This test does not actually evaluate the modules, but create
+ // fake results.
+ std::vector<std::shared_ptr<Tensor>> first_result;
+ {
+ std::vector<float> val;
+ val.resize(16);
+ for (uint32_t i = 0; i < 16; i++)
+ val[i] = i;
+
+ auto output = output_tensor_with_value(&first, val);
+ first_result.emplace_back(output);
+ }
+
+ std::vector<std::shared_ptr<Tensor>> second_result;
+ {
+ std::vector<float> val{12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 14, 15, 16};
+
+ auto output = output_tensor_with_value(&second, val);
+ second_result.emplace_back(output);
+ }
+
+ top5.accumulate(first_result, second_result);
+
+ std::stringstream ss;
+ top5.dump(ss);
+ std::string result = ss.str();
+
+ EXPECT_NE(std::string::npos, result.find("Mean Top-5 match ratio for output_0 is 0.8"));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_num_elem_less_than_k_NEG)
+{
+ luci::Module first;
+ AddOneGraph first_g;
+ first_g.init();
+
+ first.add(std::move(first_g.graph()));
+
+ luci::Module second;
+ AddTwoGraph second_g;
+ second_g.init();
+
+ second.add(std::move(second_g.graph()));
+
+ TopKMatchPrinter top100(100);
+
+ top100.init(&first, &second);
+
+ // This test does not actually evaluate the modules, but create
+ // fake results.
+ std::vector<std::shared_ptr<Tensor>> first_result;
+ {
+ auto output = output_tensor_with_value(&first, 0);
+ first_result.emplace_back(output);
+ }
+
+ std::vector<std::shared_ptr<Tensor>> second_result;
+ {
+ auto output = output_tensor_with_value(&second, 0);
+ second_result.emplace_back(output);
+ }
+
+ top100.accumulate(first_result, second_result);
+
+ std::stringstream ss;
+ top100.dump(ss);
+ std::string result = ss.str();
+
+ EXPECT_EQ(std::string::npos, result.find("Mean Top-100 match ratio"));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_init_with_null_NEG)
+{
+ TopKMatchPrinter topk(5);
+
+ EXPECT_ANY_THROW(topk.init(nullptr, nullptr));
+}
+
+TEST(CircleEvalMetricPrinterTest, MSE_simple)
+{
+ luci::Module first;
+ AddOneGraph first_g;
+ first_g.init();
+
+ first.add(std::move(first_g.graph()));
+
+ luci::Module second;
+ AddTwoGraph second_g;
+ second_g.init();
+
+ second.add(std::move(second_g.graph()));
+
+ MSEPrinter mse;
+
+ mse.init(&first, &second);
+
+ // This test does not actually evaluate the modules, but create
+ // fake results.
+ std::vector<std::shared_ptr<Tensor>> first_result;
+ {
+ auto output = output_tensor_with_value(&first, 1.0);
+ first_result.emplace_back(output);
+ }
+
+ std::vector<std::shared_ptr<Tensor>> second_result;
+ {
+ auto output = output_tensor_with_value(&second, 2.0);
+ second_result.emplace_back(output);
+ }
+
+ mse.accumulate(first_result, second_result);
+
+ std::stringstream ss;
+ mse.dump(ss);
+ std::string result = ss.str();
+
+ EXPECT_NE(std::string::npos, result.find("MSE for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MSE_init_with_null_NEG)
+{
+ MSEPrinter mse;
+
+ EXPECT_ANY_THROW(mse.init(nullptr, nullptr));
+}
+
} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp b/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp
deleted file mode 100644
index 85f985873..000000000
--- a/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ModuleEvalDiff.h"
-#include "Tensor.h"
-
-#include <luci_interpreter/Interpreter.h>
-#include <dio_hdf5/HDF5Importer.h>
-
-#include <string>
-#include <stdexcept>
-#include <iostream>
-#include <cassert>
-
-using Tensor = circle_eval_diff::Tensor;
-using DataType = loco::DataType;
-using Shape = std::vector<loco::Dimension>;
-using HDF5Importer = dio::hdf5::HDF5Importer;
-
-namespace
-{
-
-// Check the type and the shape of CircleInput
-void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape)
-{
- // Type check
- if (dtype != input_node->dtype())
- throw std::runtime_error("Wrong input type.");
-
- if (shape.size() != input_node->rank())
- throw std::runtime_error("Input rank mismatch.");
-
- for (uint32_t i = 0; i < shape.size(); i++)
- {
- if (not(shape.at(i) == input_node->dim(i)))
- throw std::runtime_error("Input shape mismatch.");
- }
-}
-
-// Return number of elements of the node.
-uint32_t numElements(const luci::CircleNode *node)
-{
- uint32_t num_elem = 1;
- for (uint32_t i = 0; i < node->rank(); ++i)
- num_elem *= node->dim(i).value();
- return num_elem;
-}
-
-// Return Tensor which has the same dtype and shape with node.
-// Buffer does not have any data yet.
-std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node)
-{
- auto tensor = std::make_shared<Tensor>();
- {
- tensor->dtype(node->dtype());
- tensor->rank(node->rank());
- for (uint32_t i = 0; i < node->rank(); i++)
- tensor->dim(i) = node->dim(i);
-
- switch (node->dtype())
- {
- case loco::DataType::FLOAT32:
- tensor->size<loco::DataType::FLOAT32>(numElements(node));
- break;
- case loco::DataType::U8:
- tensor->size<loco::DataType::U8>(numElements(node));
- break;
- case loco::DataType::S16:
- tensor->size<loco::DataType::S16>(numElements(node));
- break;
- case loco::DataType::S32:
- tensor->size<loco::DataType::S32>(numElements(node));
- break;
- case loco::DataType::S64:
- tensor->size<loco::DataType::S64>(numElements(node));
- break;
- default:
- throw std::runtime_error("Unsupported input tensor dtype for " + node->name());
- }
- }
-
- return tensor;
-}
-
-} // namespace
-
-namespace circle_eval_diff
-{
-
-void H5InputEvalDiff::evalDiff(const std::string &first_input_data_path,
- const std::string &second_input_data_path) const
-{
- const auto interp = std::make_unique<luci_interpreter::Interpreter>(_first_module.get());
-
- _metric->init(_first_module.get(), _second_module.get());
-
- try
- {
- HDF5Importer first_h5(first_input_data_path);
- first_h5.importGroup("value");
-
- HDF5Importer second_h5(second_input_data_path);
- second_h5.importGroup("value");
-
- const auto first_num_data = first_h5.numData();
- const auto second_num_data = second_h5.numData();
-
- if (first_num_data != second_num_data)
- throw std::runtime_error(
- "Number of data in the first data file and the second data file mismatches.");
-
- if (first_num_data == 0)
- throw std::runtime_error("Input data file does not contain any record.");
-
- const auto first_input_nodes = loco::input_nodes(_first_module->graph());
- const auto first_num_inputs = first_input_nodes.size();
- const auto first_output_nodes = loco::output_nodes(_first_module->graph());
- const auto first_num_outputs = first_output_nodes.size();
-
- const auto second_input_nodes = loco::input_nodes(_second_module->graph());
- const auto second_num_inputs = second_input_nodes.size();
- const auto second_output_nodes = loco::output_nodes(_second_module->graph());
- const auto second_num_outputs = second_output_nodes.size();
-
- for (int32_t data_idx = 0; data_idx < first_num_data; data_idx++)
- {
- std::cout << "Evaluating " << data_idx << "'th data" << std::endl;
-
- if (first_num_inputs != first_h5.numInputs(data_idx) ||
- second_num_inputs != second_h5.numInputs(data_idx))
- throw std::runtime_error("Wrong number of inputs in " + std::to_string(data_idx) +
- "th data.");
-
- // Do inference and return output
- auto eval = [&](HDF5Importer &h5, uint32_t num_inputs,
- const std::vector<loco::Node *> &input_nodes, uint32_t num_outputs,
- const std::vector<loco::Node *> &output_nodes) {
- // Write input data
- for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
- {
- const auto *input_node =
- loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
- assert(input_node->index() == input_idx);
-
- auto tensor = createEmptyTensor(input_node);
- if (h5.isRawData())
- {
- h5.readTensor(data_idx, input_idx, tensor->buffer());
- }
- else
- {
- DataType dtype;
- Shape shape;
- h5.readTensor(data_idx, input_idx, &dtype, &shape, tensor->buffer());
-
- // Check the type and the shape of the input data is valid
- verifyTypeShape(input_node, dtype, shape);
- }
-
- interp->writeInputTensor(input_node, tensor->buffer(), tensor->byte_size());
- }
-
- // Interpret
- interp->interpret();
-
- // Read output data
- std::vector<std::shared_ptr<Tensor>> outputs;
- for (uint32_t output_idx = 0; output_idx < num_outputs; output_idx++)
- {
- const auto *output_node =
- loco::must_cast<const luci::CircleOutput *>(output_nodes[output_idx]);
- assert(output_node->index() == output_idx);
-
- auto tensor = createEmptyTensor(output_node);
- interp->readOutputTensor(output_node, tensor->buffer(), tensor->byte_size());
- outputs.emplace_back(tensor);
- }
-
- return outputs;
- };
-
- auto first_output =
- eval(first_h5, first_num_inputs, first_input_nodes, first_num_outputs, first_output_nodes);
- auto second_output = eval(second_h5, second_num_inputs, second_input_nodes,
- second_num_outputs, second_output_nodes);
-
- // Accumulate diffs
- _metric->accumulate(first_output, second_output);
- }
-
- std::cout << "Evaluation finished. Number of data: " << first_num_data << std::endl;
- }
- catch (const H5::Exception &e)
- {
- H5::Exception::printErrorStack();
- throw std::runtime_error("HDF5 error occurred.");
- }
-
- // Print metric
- std::cout << _metric.get() << std::endl;
-}
-
-} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/ModuleEvalDiff.h b/compiler/circle-eval-diff/src/ModuleEvalDiff.h
deleted file mode 100644
index c7642f60b..000000000
--- a/compiler/circle-eval-diff/src/ModuleEvalDiff.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__
-#define __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__
-
-#include "MetricPrinter.h"
-
-#include <luci/IR/Module.h>
-
-#include <memory>
-
-namespace circle_eval_diff
-{
-
-class ModuleEvalDiff
-{
-public:
- ModuleEvalDiff(std::unique_ptr<luci::Module> &&first, std::unique_ptr<luci::Module> &&second,
- std::unique_ptr<MetricPrinter> &&metric)
- : _first_module(std::move(first)), _second_module(std::move(second)), _metric(std::move(metric))
- {
- }
-
- virtual ~ModuleEvalDiff() = default;
-
- // Implement this in the child class
- virtual void evalDiff(const std::string &first_input_data_path,
- const std::string &second_input_data_path) const = 0;
-
-protected:
- std::unique_ptr<luci::Module> _first_module;
- std::unique_ptr<luci::Module> _second_module;
- std::unique_ptr<MetricPrinter> _metric;
-};
-
-class H5InputEvalDiff final : public ModuleEvalDiff
-{
-public:
- H5InputEvalDiff(std::unique_ptr<luci::Module> &&first, std::unique_ptr<luci::Module> &&second,
- std::unique_ptr<MetricPrinter> &&metric)
- : ModuleEvalDiff(std::move(first), std::move(second), std::move(metric))
- {
- }
-
- void evalDiff(const std::string &first_input_data_path,
- const std::string &second_input_data_path) const;
-};
-
-// TODO Implement ModuleEvalDiff for random input and directory input
-
-} // namespace circle_eval_diff
-
-#endif // __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__
diff --git a/compiler/circle-eval-diff/src/Tensor.cpp b/compiler/circle-eval-diff/src/Tensor.cpp
index 6710e8c3d..c3efc44cd 100644
--- a/compiler/circle-eval-diff/src/Tensor.cpp
+++ b/compiler/circle-eval-diff/src/Tensor.cpp
@@ -16,8 +16,24 @@
#include "Tensor.h"
+#include <luci/IR/CircleNodeDecl.h>
+
#include <cassert>
+namespace
+{
+
+// Return number of elements of the node.
+uint32_t numElements(const luci::CircleNode *node)
+{
+ uint32_t num_elem = 1;
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ num_elem *= node->dim(i).value();
+ return num_elem;
+}
+
+} // namespace
+
namespace circle_eval_diff
{
@@ -69,4 +85,40 @@ INSTANTIATE(loco::DataType::FLOAT32);
#undef INSTANTIATE
+// Return Tensor which has the same dtype and shape with node.
+// Buffer does not have any data yet.
+std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node)
+{
+ auto tensor = std::make_shared<Tensor>();
+ {
+ tensor->dtype(node->dtype());
+ tensor->rank(node->rank());
+ for (uint32_t i = 0; i < node->rank(); i++)
+ tensor->dim(i) = node->dim(i);
+
+ switch (node->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ tensor->size<loco::DataType::FLOAT32>(numElements(node));
+ break;
+ case loco::DataType::U8:
+ tensor->size<loco::DataType::U8>(numElements(node));
+ break;
+ case loco::DataType::S16:
+ tensor->size<loco::DataType::S16>(numElements(node));
+ break;
+ case loco::DataType::S32:
+ tensor->size<loco::DataType::S32>(numElements(node));
+ break;
+ case loco::DataType::S64:
+ tensor->size<loco::DataType::S64>(numElements(node));
+ break;
+ default:
+ throw std::runtime_error("Unsupported input tensor dtype for " + node->name());
+ }
+ }
+
+ return tensor;
+}
+
} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/Tensor.h b/compiler/circle-eval-diff/src/Tensor.h
index 65ab60638..d4f65d951 100644
--- a/compiler/circle-eval-diff/src/Tensor.h
+++ b/compiler/circle-eval-diff/src/Tensor.h
@@ -18,6 +18,7 @@
#define __CIRCLE_EVAL_DIFF_TENSOR_H__
#include <loco.h>
+#include <luci/IR/CircleNodeDecl.h>
#include <vector>
@@ -76,6 +77,8 @@ private:
std::vector<uint8_t> _data;
};
+std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node);
+
} // namespace circle_eval_diff
#endif // __CIRCLE_EVAL_DIFF_TENSOR_H__
diff --git a/compiler/circle-eval-diff/src/Tensor.test.cpp b/compiler/circle-eval-diff/src/Tensor.test.cpp
index 3bdeaecdf..395865748 100644
--- a/compiler/circle-eval-diff/src/Tensor.test.cpp
+++ b/compiler/circle-eval-diff/src/Tensor.test.cpp
@@ -18,6 +18,8 @@
#include <gtest/gtest.h>
+#include <luci/IR/CircleNodes.h>
+
using Tensor = circle_eval_diff::Tensor;
namespace
@@ -99,3 +101,29 @@ TEST(CircleEvalDiffTensorTest, out_of_buffer_range_NEG)
SUCCEED();
}
+
+TEST(CircleEvalDiffTensorTest, createEmptyTensorTest)
+{
+ luci::CircleInput input;
+ input.dtype(loco::DataType::FLOAT32);
+ input.rank(4);
+ input.dim(0).set(1);
+ input.dim(1).set(3);
+ input.dim(2).set(3);
+ input.dim(3).set(2);
+
+ loco::DataType right_data_type{loco::DataType::FLOAT32};
+ std::vector<loco::Dimension> right_shape;
+ right_shape.emplace_back(1);
+ right_shape.emplace_back(3);
+ right_shape.emplace_back(3);
+ right_shape.emplace_back(2);
+
+ auto tensor = circle_eval_diff::createEmptyTensor(&input);
+ EXPECT_EQ(loco::DataType::FLOAT32, tensor->dtype());
+ EXPECT_EQ(4, tensor->rank());
+ EXPECT_EQ(1, tensor->dim(0));
+ EXPECT_EQ(3, tensor->dim(1));
+ EXPECT_EQ(3, tensor->dim(2));
+ EXPECT_EQ(2, tensor->dim(3));
+}
diff --git a/compiler/circle-execution-plan/CMakeLists.txt b/compiler/circle-execution-plan/CMakeLists.txt
index 2f657c171..da74e021d 100644
--- a/compiler/circle-execution-plan/CMakeLists.txt
+++ b/compiler/circle-execution-plan/CMakeLists.txt
@@ -1,3 +1,9 @@
+nnas_find_package(Jsoncpp)
+if(NOT Jsoncpp_FOUND)
+ message(STATUS "Build circle-execution-plan: FAILED (missing jsoncpp)")
+ return()
+endif(NOT Jsoncpp_FOUND)
+
set(SOURCES
pal/IScratchpadHelper.h
pal/ScratchpadHelperLinux.h
@@ -10,6 +16,9 @@ set(SOURCES
)
add_executable(circle_execution_plan "${SOURCES}")
+target_include_directories(circle_execution_plan PRIVATE ${Jsoncpp_INCLUDE_DIRS})
+
+target_link_libraries(circle_execution_plan ${Jsoncpp_STATIC_LIB})
target_link_libraries(circle_execution_plan foder)
target_link_libraries(circle_execution_plan safemain)
target_link_libraries(circle_execution_plan luci_env)
diff --git a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
index 1788124c3..d5ddf0ce9 100644
--- a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
+++ b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
@@ -33,20 +33,22 @@ int entry(int argc, char **argv)
{
arser::Arser arser("circle_execution_plan provides model with execution plan meta information");
- arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
- arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
- arser.add_argument("--platform")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
- .default_value("linux")
- .help("Platform name: linux mcu cmsisnn");
+ arser.add_argument("input").help("Input circle model");
+ arser.add_argument("output").help("Output circle model");
+ arser.add_argument("--platform").default_value("linux").help("Platform name: linux mcu cmsisnn");
arser.add_argument("--use_dsp")
.nargs(1)
.type(arser::DataType::BOOL)
.required(false)
.default_value(false)
.help("Plan with or without dsp (now can be used only with cmsisnn)");
+ arser.add_argument("--save_allocations")
+ .nargs(1)
+ .required(false)
+ .default_value("")
+ .help("Path for output JSON file to save memory allocation info. "
+ "Note: path end of file should have 'tracealloc.json' (example path: "
+ "'../exec_plan_info.tracealloc.json')");
try
{
@@ -63,6 +65,7 @@ int entry(int argc, char **argv)
const std::string output_path = arser.get<std::string>("output");
const std::string platform_name = arser.get<std::string>("--platform");
const bool use_dsp = arser.get<bool>("--use_dsp");
+ const std::string json_path = arser.get<std::string>("--save_allocations");
if (platform_name != "cmsisnn" && use_dsp)
{
@@ -89,6 +92,13 @@ int entry(int argc, char **argv)
return EXIT_FAILURE;
}
+ bool is_save_allocations = false;
+
+ if (!json_path.empty())
+ {
+ is_save_allocations = true;
+ }
+
foder::FileLoader file_loader{input_path};
std::vector<char> model_data;
@@ -124,6 +134,9 @@ int entry(int argc, char **argv)
circle_planner::ExecutionPlanner execution_planner(module->graph(), {platform_type, use_dsp});
execution_planner.make_execution_plan();
+ if (is_save_allocations)
+ execution_planner.create_json_allocation_file(json_path);
+
// Export to output Circle file
luci::CircleExporter exporter;
luci::CircleFileExpContract contract(module.get(), output_path);
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
index ec2ec1362..a1e6f7e1a 100644
--- a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
@@ -18,6 +18,9 @@
#include <loco/IR/Algorithm.h>
#include <luci/UserSettings.h>
+#include <json.h>
+#include <fstream>
+
namespace circle_planner
{
namespace
@@ -58,6 +61,29 @@ bool isTensorProducingNode(const luci::CircleNode *node)
}
}
+// Create allocation node part for current circle node for json allocation info file
+void create_allocation_node(Json::Value &allocations_node,
+ AllocationNodeInformation &alloca_node_inform, uint32_t alive_till_max,
+ luci::CircleNode *circle_node)
+{
+ Json::Value allocation_node;
+ if (alloca_node_inform.size == 0)
+ return;
+
+ allocation_node["offset"] = alloca_node_inform.offset;
+ allocation_node["size"] = alloca_node_inform.size;
+ allocation_node["alive_from"] = alloca_node_inform.first_node;
+
+ if (alloca_node_inform.last_node == node_not_assigned)
+ allocation_node["alive_till"] = alive_till_max + 1;
+ else
+ allocation_node["alive_till"] = alloca_node_inform.last_node;
+
+ allocation_node["origin"] = circle_node->name();
+
+ allocations_node.append(allocation_node);
+}
+
} // namespace
void ExecutionPlanner::make_execution_plan()
@@ -74,6 +100,50 @@ void ExecutionPlanner::make_execution_plan()
settings->set(luci::UserSettings::Key::ExecutionPlanGen, true);
}
+void ExecutionPlanner::create_json_allocation_file(const std::string &json_path)
+{
+ Json::Value main_tree;
+ Json::Value segments_node;
+ Json::Value allocations_node;
+
+ uint32_t alive_till_max = 0;
+
+ // Find max dealloc value to assign to nodes with node_not_assigned value
+ for (const auto elem : _dealloc_node)
+ {
+ if (alive_till_max < elem and elem != node_not_assigned)
+ alive_till_max = elem;
+ }
+
+ for (auto &alloc_node_inform : _alloc_node_inform_vector)
+ {
+ const auto node_num = alloc_node_inform.node_num;
+ const auto circle_node = loco::must_cast<luci::CircleNode *>(_ordered_nodes[node_num]);
+
+ create_allocation_node(allocations_node, alloc_node_inform, alive_till_max, circle_node);
+ }
+
+ // Create segment part
+ Json::Value segment_node;
+ segment_node["name"] = "Segment1";
+ segment_node["allocations"] = allocations_node;
+ segments_node.append(segment_node);
+
+ main_tree["schema_version"] = 1;
+ main_tree["segments"] = segments_node;
+
+ Json::StreamWriterBuilder builder;
+ const std::unique_ptr<Json::StreamWriter> writer(builder.newStreamWriter());
+
+ // Write to json file
+ std::ofstream out;
+ out.open(json_path);
+ if (out.is_open())
+ {
+ writer->write(main_tree, &out);
+ }
+}
+
void ExecutionPlanner::get_default_execution_order_plan()
{
// Get execution order in _ordered_nodes
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.h b/compiler/circle-execution-plan/src/ExecutionPlanner.h
index e0833c407..af3fba33e 100644
--- a/compiler/circle-execution-plan/src/ExecutionPlanner.h
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.h
@@ -104,6 +104,8 @@ public:
_is_null_scratchpads = is_null_scratchpads;
};
+ void create_json_allocation_file(const std::string &json_path);
+
private:
// Method gets default execution order plan and saves it in _ordered_nodes vector.
// There can be different variants of execution order and this method provides main one.
diff --git a/compiler/circle-inspect/driver/Driver.cpp b/compiler/circle-inspect/driver/Driver.cpp
index 10e185de5..318a5826b 100644
--- a/compiler/circle-inspect/driver/Driver.cpp
+++ b/compiler/circle-inspect/driver/Driver.cpp
@@ -36,7 +36,7 @@ int entry(int argc, char **argv)
.help("Dump Conv2D series weight operators in circle file");
arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in circle file");
arser.add_argument("--tensor_dtype").nargs(0).help("Dump dtype of tensors");
- arser.add_argument("circle").type(arser::DataType::STR).help("Circle file to inspect");
+ arser.add_argument("circle").help("Circle file to inspect");
try
{
diff --git a/compiler/circle-inspect/requires.cmake b/compiler/circle-inspect/requires.cmake
index 362d67cf4..183dfe227 100644
--- a/compiler/circle-inspect/requires.cmake
+++ b/compiler/circle-inspect/requires.cmake
@@ -1,3 +1,4 @@
require("arser")
+require("foder")
require("mio-circle04")
require("safemain")
diff --git a/compiler/circle-inspect/src/Dump.cpp b/compiler/circle-inspect/src/Dump.cpp
index bba5e56c3..aa8fed248 100644
--- a/compiler/circle-inspect/src/Dump.cpp
+++ b/compiler/circle-inspect/src/Dump.cpp
@@ -15,7 +15,9 @@
*/
#include "Dump.h"
-#include "Reader.h"
+
+#include <mio_circle/Helper.h>
+#include <mio_circle/Reader.h>
#include <ostream>
@@ -24,7 +26,7 @@ namespace circleinspect
void DumpOperators::run(std::ostream &os, const circle::Model *model)
{
- circleinspect::Reader reader(model);
+ mio::circle::Reader reader(model);
const uint32_t subgraph_size = reader.num_subgraph();
@@ -50,7 +52,7 @@ void DumpOperators::run(std::ostream &os, const circle::Model *model)
namespace
{
-const circle::Operator *operator_match_output(circleinspect::Reader &reader, const int32_t tensor)
+const circle::Operator *operator_match_output(mio::circle::Reader &reader, const int32_t tensor)
{
auto ops = reader.operators();
@@ -58,7 +60,7 @@ const circle::Operator *operator_match_output(circleinspect::Reader &reader, con
{
const auto op = ops->Get(i);
- const std::vector<int32_t> &outputs = circleinspect::as_index_vector(op->outputs());
+ const std::vector<int32_t> &outputs = mio::circle::as_index_vector(op->outputs());
for (auto output : outputs)
{
@@ -69,7 +71,7 @@ const circle::Operator *operator_match_output(circleinspect::Reader &reader, con
return nullptr;
}
-size_t tensor_buffer_size(circleinspect::Reader &reader, const int32_t tensor_id)
+size_t tensor_buffer_size(mio::circle::Reader &reader, const int32_t tensor_id)
{
auto tensors = reader.tensors();
@@ -93,7 +95,7 @@ namespace circleinspect
void DumpConv2DWeight::run(std::ostream &os, const circle::Model *model)
{
- circleinspect::Reader reader(model);
+ mio::circle::Reader reader(model);
const uint32_t subgraph_size = reader.num_subgraph();
@@ -110,7 +112,7 @@ void DumpConv2DWeight::run(std::ostream &os, const circle::Model *model)
if (bc == circle::BuiltinOperator_CONV_2D || bc == circle::BuiltinOperator_DEPTHWISE_CONV_2D)
{
- const std::vector<int32_t> &inputs = circleinspect::as_index_vector(op->inputs());
+ const std::vector<int32_t> &inputs = mio::circle::as_index_vector(op->inputs());
if (inputs.size() < 2)
{
throw std::runtime_error("Operator has invalid input");
@@ -147,7 +149,7 @@ void DumpOperatorVersion::run(std::ostream &os, const circle::Model *model)
{
std::map<std::string, int32_t> op_version_map;
- circleinspect::Reader reader(model);
+ mio::circle::Reader reader(model);
// This assert is subject to be changed later
assert(reader.num_subgraph() == 1);
@@ -181,7 +183,7 @@ namespace circleinspect
void DumpTensorDType::run(std::ostream &os, const circle::Model *model)
{
- circleinspect::Reader reader(model);
+ mio::circle::Reader reader(model);
const uint32_t subgraph_size = reader.num_subgraph();
diff --git a/compiler/circle-inspect/src/Reader.cpp b/compiler/circle-inspect/src/Reader.cpp
deleted file mode 100644
index 0e2865254..000000000
--- a/compiler/circle-inspect/src/Reader.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Reader.h"
-
-#include <mio_circle/Helper.h>
-
-#include <sstream>
-#include <string>
-
-namespace circleinspect
-{
-
-Reader::Reader(const circle::Model *model)
-{
- _subgraphs = model->subgraphs();
- _buffers = model->buffers();
-
- auto opcodes = model->operator_codes();
- for (const ::circle::OperatorCode *opcode : *opcodes)
- {
- _op_codes.push_back(opcode);
- }
-}
-
-size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
-{
- if (buff_data != nullptr)
- {
- *buff_data = nullptr;
- }
-
- if (buf_idx == 0)
- return 0;
-
- if (auto *buffer = (*_buffers)[buf_idx])
- {
- if (auto *array = buffer->data())
- {
- if (size_t size = array->size())
- {
- if (buff_data != nullptr)
- {
- *buff_data = reinterpret_cast<const uint8_t *>(array->data());
- }
- return size;
- }
- }
- }
-
- return 0;
-}
-
-circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
-{
- uint32_t index = op->opcode_index();
- assert(index < _op_codes.size());
- const circle::OperatorCode *opcode = _op_codes.at(index);
-
- return mio::circle::builtin_code_neutral(opcode);
-}
-
-std::string Reader::opcode_name(const circle::Operator *op) const
-{
- uint32_t index = op->opcode_index();
- assert(index < _op_codes.size());
- const circle::OperatorCode *opcode = _op_codes.at(index);
-
- if (!mio::circle::is_valid(opcode))
- {
- std::ostringstream oss;
- oss << "(invalid: " << index << ")";
- return oss.str();
- }
-
- return mio::circle::opcode_name(opcode);
-}
-
-std::string Reader::tensor_name(const circle::Tensor *tensor) const
-{
- return mio::circle::tensor_name(tensor);
-}
-
-std::string Reader::tensor_dtype(const circle::Tensor *tensor) const
-{
- return mio::circle::tensor_type(tensor);
-}
-
-bool Reader::select_subgraph(uint32_t sgindex)
-{
- _tensors = nullptr;
- _operators = nullptr;
-
- _inputs.clear();
- _outputs.clear();
-
- if (_subgraphs->Length() <= sgindex)
- {
- assert(false);
- return false;
- }
-
- const circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
-
- _tensors = subgraph->tensors();
- _operators = subgraph->operators();
-
- _inputs = as_index_vector(subgraph->inputs());
- _outputs = as_index_vector(subgraph->outputs());
-
- return true;
-}
-
-} // namespace circleinspect
diff --git a/compiler/circle-inspect/src/Reader.h b/compiler/circle-inspect/src/Reader.h
deleted file mode 100644
index c38ec3990..000000000
--- a/compiler/circle-inspect/src/Reader.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __READER_H__
-#define __READER_H__
-
-#include <mio/circle/schema_generated.h>
-
-#include <map>
-#include <string>
-#include <vector>
-
-namespace circleinspect
-{
-
-template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
-{
- std::vector<T> ret(flat_array->Length());
- for (uint32_t i = 0; i < flat_array->Length(); i++)
- {
- ret[i] = flat_array->Get(i);
- }
- return ret;
-}
-
-/**
- * @brief Loads Circle file and provides helpers to access attributes
- */
-class Reader
-{
-private:
- using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
- using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
- using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
- using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
-
-public:
- Reader(const circle::Model *model);
-
- Reader() = delete;
-
-public:
- const std::vector<const circle::OperatorCode *> &opcodes() { return _op_codes; }
- const CircleBuffers_t *buffers() { return _buffers; }
- const CircleTensors_t *tensors() { return _tensors; }
- const CircleOperators_t *operators() { return _operators; }
- const std::vector<int32_t> &inputs() const { return _inputs; }
- const std::vector<int32_t> &outputs() const { return _outputs; }
-
- uint32_t num_subgraph() const { return _subgraphs->Length(); }
-
- size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
- circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
- std::string opcode_name(const circle::Operator *op) const;
- std::string tensor_name(const circle::Tensor *tensor) const;
- std::string tensor_dtype(const circle::Tensor *tensor) const;
-
-public:
- bool select_subgraph(uint32_t subgraph);
-
-private:
- const CircleSubGraphs_t *_subgraphs{nullptr};
- const CircleBuffers_t *_buffers{nullptr};
- const CircleTensors_t *_tensors{nullptr};
- const CircleOperators_t *_operators{nullptr};
-
- std::vector<const circle::OperatorCode *> _op_codes;
- std::vector<int32_t> _inputs;
- std::vector<int32_t> _outputs;
-};
-
-} // namespace circleinspect
-
-#endif // __READER_H__
diff --git a/compiler/circle-interpreter/CMakeLists.txt b/compiler/circle-interpreter/CMakeLists.txt
new file mode 100644
index 000000000..d18db3e11
--- /dev/null
+++ b/compiler/circle-interpreter/CMakeLists.txt
@@ -0,0 +1,13 @@
+set(INTERPRETER
+ src/CircleInterpreter.cpp
+ )
+
+add_executable(circle-interpreter ${INTERPRETER})
+target_link_libraries(circle-interpreter PRIVATE arser)
+target_link_libraries(circle-interpreter PRIVATE loco)
+target_link_libraries(circle-interpreter PRIVATE luci_import)
+target_link_libraries(circle-interpreter PRIVATE luci_interpreter)
+target_link_libraries(circle-interpreter PRIVATE safemain)
+target_link_libraries(circle-interpreter PRIVATE vconone)
+
+install(TARGETS circle-interpreter DESTINATION bin)
diff --git a/compiler/circle-interpreter/requires.cmake b/compiler/circle-interpreter/requires.cmake
new file mode 100644
index 000000000..a565df65b
--- /dev/null
+++ b/compiler/circle-interpreter/requires.cmake
@@ -0,0 +1,6 @@
+require("arser")
+require("loco")
+require("luci")
+require("luci-interpreter")
+require("safemain")
+require("vconone")
diff --git a/compiler/circle-interpreter/src/CircleInterpreter.cpp b/compiler/circle-interpreter/src/CircleInterpreter.cpp
new file mode 100644
index 000000000..1d241278d
--- /dev/null
+++ b/compiler/circle-interpreter/src/CircleInterpreter.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <arser/arser.h>
+#include <luci/ImporterEx.h>
+#include <luci_interpreter/Interpreter.h>
+#include <vconone/vconone.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <vector>
+#include <string>
+
+namespace
+{
+
+void readDataFromFile(const std::string &filename, char *data, size_t data_size)
+{
+ std::ifstream fs(filename, std::ifstream::binary);
+ if (fs.fail())
+ throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+ if (fs.read(data, data_size).fail())
+ throw std::runtime_error("Failed to read data from file \"" + filename + "\".\n");
+}
+
+void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
+{
+ std::ofstream fs(filename, std::ofstream::binary);
+ if (fs.fail())
+ throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+ if (fs.write(data, data_size).fail())
+ {
+ throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+ }
+}
+
+template <typename NodeT> size_t getTensorSize(const NodeT *node)
+{
+ uint32_t tensor_size = loco::size(node->dtype());
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ tensor_size *= node->dim(i).value();
+ return tensor_size;
+}
+
+void print_version(void)
+{
+ std::cout << "circle-interpreter version " << vconone::get_string() << std::endl;
+ std::cout << vconone::get_copyright() << std::endl;
+}
+
+} // namespace
+
+/*
+ * @brief CircleInterpreter main
+ *
+ * Driver to invoke luci-interpreter
+ *
+ */
+int entry(int argc, char **argv)
+{
+ arser::Arser arser("Interpreter driver for circle models");
+
+ arser::Helper::add_version(arser, print_version);
+
+ arser.add_argument("model_path").help("Circle model filepath");
+ arser.add_argument("input_prefix")
+ .help("Input data filepath for circle model. "
+ "n-th input data is read from ${input_prefix}n, "
+ "for example, Add.circle.input0, Add.circle.input1");
+ arser.add_argument("output_prefix")
+ .help("Output data filepath for circle model. "
+ "Output data is written in ${output_file}n, "
+ "for example, Add.circle.output0");
+
+ try
+ {
+ arser.parse(argc, argv);
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cout << err.what() << std::endl;
+ std::cout << arser;
+ return EXIT_FAILURE;
+ }
+
+ const auto filename = arser.get<std::string>("model_path");
+ const auto input_prefix = arser.get<std::string>("input_prefix");
+ const auto output_prefix = arser.get<std::string>("output_prefix");
+
+ // Load model from the file
+ luci::ImporterEx importer;
+ std::unique_ptr<luci::Module> module = importer.importVerifyModule(filename);
+ if (module == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ // Create interpreter.
+ luci_interpreter::Interpreter interpreter(module.get());
+
+ // Set input.
+ // Data for n'th input is read from ${input_prefix}n
+ // (ex: Add.circle.input0, Add.circle.input1 ..)
+ const auto input_nodes = loco::input_nodes(module->graph());
+ for (int32_t i = 0; i < input_nodes.size(); i++)
+ {
+ const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
+ std::vector<char> input_data(getTensorSize(input_node));
+ readDataFromFile(std::string(input_prefix) + std::to_string(i), input_data.data(),
+ input_data.size());
+ interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+ }
+
+ // Do inference.
+ interpreter.interpret();
+
+ // Get output.
+ const auto output_nodes = loco::output_nodes(module->graph());
+ for (int i = 0; i < module->graph()->outputs()->size(); i++)
+ {
+ const auto *output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+ std::vector<char> output_data(getTensorSize(output_node));
+ interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+
+ // Output data is written in ${output_file}n
+ // (ex: Add.circle.output0)
+ writeDataToFile(std::string(output_prefix) + std::to_string(i), output_data.data(),
+ output_data.size());
+ }
+ return EXIT_SUCCESS;
+}
diff --git a/compiler/circle-operator-test/CMakeLists.txt b/compiler/circle-operator-test/CMakeLists.txt
new file mode 100644
index 000000000..2ebd533b9
--- /dev/null
+++ b/compiler/circle-operator-test/CMakeLists.txt
@@ -0,0 +1,18 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+get_target_property(ARTIFACTS_PATH testDataGenerator BINARY_DIR)
+get_target_property(CIRCLE_OPERATOR_PATH circle-operator BINARY_DIR)
+set(CIRCLE_OPERATOR_PATH "${CIRCLE_OPERATOR_PATH}/circle-operator")
+
+nnas_find_package(GTest REQUIRED)
+
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+
+GTest_AddTest(circle-operator-test ${TESTS})
+
+set_tests_properties(circle-operator-test
+ PROPERTIES
+ ENVIRONMENT "ARTIFACTS_PATH=${ARTIFACTS_PATH};CIRCLE_OPERATOR_PATH=${CIRCLE_OPERATOR_PATH}"
+ )
diff --git a/compiler/circle-operator-test/README.md b/compiler/circle-operator-test/README.md
new file mode 100644
index 000000000..d07c64d2e
--- /dev/null
+++ b/compiler/circle-operator-test/README.md
@@ -0,0 +1,7 @@
+# circle-operator-test
+
+_circle-operator-test_ provides test of circle-operator tool is working as expected.
+
+Current tests includes
+- input arguments test is working as expected
+- output of this tool is as expected
diff --git a/compiler/circle-operator-test/requires.cmake b/compiler/circle-operator-test/requires.cmake
new file mode 100644
index 000000000..8ad3b8a64
--- /dev/null
+++ b/compiler/circle-operator-test/requires.cmake
@@ -0,0 +1,2 @@
+require("circle-operator")
+require("common-artifacts")
diff --git a/compiler/circle-operator-test/src/circle-operator.test.cpp b/compiler/circle-operator-test/src/circle-operator.test.cpp
new file mode 100644
index 000000000..29c6f3792
--- /dev/null
+++ b/compiler/circle-operator-test/src/circle-operator.test.cpp
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <vector>
+
+class cirlce_operator_test : public ::testing::Test
+{
+protected:
+ bool initialize(void);
+ bool run(const std::string &command);
+
+protected:
+ bool load(const std::string &file);
+
+protected:
+ std::string _artifacts_path;
+ std::string _circle_operator_path;
+ std::string _result;
+};
+
+bool cirlce_operator_test::initialize(void)
+{
+ char *path = std::getenv("ARTIFACTS_PATH");
+ if (path == nullptr)
+ {
+ std::cerr << "ARTIFACTS_PATH not found" << std::endl;
+ return false;
+ }
+ _artifacts_path = path;
+
+ path = std::getenv("CIRCLE_OPERATOR_PATH");
+ if (path == nullptr)
+ {
+ std::cerr << "ARTIFACTS_BIN_PATH not found" << std::endl;
+ return false;
+ }
+ _circle_operator_path = path;
+
+ return true;
+}
+
+bool cirlce_operator_test::run(const std::string &command)
+{
+ std::vector<char> buffer(260);
+ std::string result = "";
+ std::string cmd_err = command + " 2>&1";
+ FILE *pipe = popen(cmd_err.c_str(), "r");
+ if (!pipe)
+ {
+ return false;
+ }
+ try
+ {
+ while (fgets(&buffer[0], buffer.size(), pipe) != NULL)
+ {
+ result += &buffer[0];
+ }
+ }
+ catch (...)
+ {
+ pclose(pipe);
+ return false;
+ }
+ pclose(pipe);
+ _result = result;
+
+ std::cout << _result << std::endl;
+
+ return true;
+}
+
+bool cirlce_operator_test::load(const std::string &file)
+{
+ std::ifstream tmp(file.c_str());
+ if (tmp.fail())
+ return false;
+
+ std::stringstream buffer;
+ buffer << tmp.rdbuf();
+ _result = buffer.str();
+ return true;
+}
+
+TEST_F(cirlce_operator_test, valid_names)
+{
+ if (!initialize())
+ {
+ FAIL();
+ return;
+ }
+
+ std::string model = _artifacts_path + "/Add_000.circle";
+ std::string command = _circle_operator_path + " --name " + model;
+ if (!run(command))
+ {
+ FAIL();
+ return;
+ }
+
+ const auto pos = _result.find("ofm");
+ ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(cirlce_operator_test, valid_codes)
+{
+ if (!initialize())
+ {
+ FAIL();
+ return;
+ }
+
+ std::string model = _artifacts_path + "/Add_000.circle";
+ std::string command = _circle_operator_path + " --code " + model;
+ if (!run(command))
+ {
+ FAIL();
+ return;
+ }
+
+ const auto pos = _result.find("ADD");
+ ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(cirlce_operator_test, invalid_option_NEG)
+{
+ if (!initialize())
+ {
+ FAIL();
+ return;
+ }
+
+ std::string model = _artifacts_path + "/Add_000.circle";
+ std::string command = _circle_operator_path + " --opname " + model;
+ if (!run(command))
+ {
+ FAIL();
+ return;
+ }
+
+ const auto pos = _result.find("Invalid argument");
+ ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(cirlce_operator_test, check_code_name)
+{
+ if (!initialize())
+ {
+ FAIL();
+ return;
+ }
+
+ std::string model = _artifacts_path + "/Add_000.circle";
+ std::string command = _circle_operator_path + " --code --name " + model;
+ if (!run(command))
+ {
+ FAIL();
+ return;
+ }
+
+ const auto pos = _result.find("ofm");
+ ASSERT_NE(std::string::npos, pos);
+ const auto pos2 = _result.find("ADD");
+ ASSERT_NE(std::string::npos, pos2);
+}
+
+TEST_F(cirlce_operator_test, nonexist_file_NEG)
+{
+ if (!initialize())
+ {
+ FAIL();
+ return;
+ }
+
+ std::string model = _artifacts_path + "/non_exist_file.foo";
+ std::string command = _circle_operator_path + " --name " + model;
+ if (!run(command))
+ {
+ FAIL();
+ return;
+ }
+
+ const auto pos = _result.find("ERROR");
+ ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(cirlce_operator_test, invalid_file_NEG)
+{
+ if (!initialize())
+ {
+ FAIL();
+ return;
+ }
+
+ std::string model = _artifacts_path + "/Add_000.recipe";
+ std::string command = _circle_operator_path + " --name " + model;
+ if (!run(command))
+ {
+ FAIL();
+ return;
+ }
+
+ const auto pos = _result.find("ERROR");
+ ASSERT_NE(std::string::npos, pos);
+}
+
+TEST_F(cirlce_operator_test, output_file)
+{
+ if (!initialize())
+ {
+ FAIL();
+ return;
+ }
+
+ std::string fileName("/tmp/a.txt");
+ std::remove(fileName.c_str());
+ std::string model = _artifacts_path + "/Add_000.circle";
+ std::string command = _circle_operator_path + " --code --output_path " + fileName + " " + model;
+ if (!run(command))
+ {
+ FAIL();
+ return;
+ }
+ if (!load(fileName))
+ {
+ FAIL();
+ return;
+ }
+
+ const auto pos = _result.find("ADD");
+ ASSERT_NE(std::string::npos, pos);
+}
diff --git a/compiler/circle-operator/CMakeLists.txt b/compiler/circle-operator/CMakeLists.txt
new file mode 100644
index 000000000..6817a8618
--- /dev/null
+++ b/compiler/circle-operator/CMakeLists.txt
@@ -0,0 +1,17 @@
+if(NOT TARGET mio_circle04)
+ return()
+endif(NOT TARGET mio_circle04)
+
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_executable(circle-operator ${DRIVER} ${SOURCES})
+target_include_directories(circle-operator PRIVATE src)
+target_link_libraries(circle-operator arser)
+target_link_libraries(circle-operator foder)
+target_link_libraries(circle-operator mio_circle04)
+target_link_libraries(circle-operator mio_circle04_helper)
+target_link_libraries(circle-operator safemain)
+
+install(TARGETS circle-operator DESTINATION bin)
diff --git a/compiler/circle-operator/README.md b/compiler/circle-operator/README.md
new file mode 100644
index 000000000..86a923f05
--- /dev/null
+++ b/compiler/circle-operator/README.md
@@ -0,0 +1,70 @@
+# circle-operator
+
+_circle-operator_ allows users to retrieve operators information from a Circle model file
+
+NOTE: this tool is primary for ONE-vscode where PartEditor needs names and codes
+of the operators.
+
+## Information with operators
+
+Operators with `--name`
+- show operator names one line at a time in execution order
+
+Example
+```
+$ circle-operator --name model.circle
+```
+
+Result
+```
+conv1_pad/Pad
+conv1_conv/BiasAdd
+pool1_pad/Pad
+```
+
+Operators codes with `--code`
+- show operator codes one line at a time in execution order
+
+Example
+```
+$ circle-operator --code model.circle
+```
+
+Result
+```
+PAD
+CONV_2D
+PAD
+```
+
+Operators with both `--code` and `--name`
+- show operator both codes and name separated with `,` one line at a time in execution order
+
+Example
+```
+$ circle-operator --code --name model.circle
+```
+
+Result
+```
+PAD,conv1_pad/Pad
+CONV_2D,conv1_conv/BiasAdd
+PAD,pool1_pad/Pad
+```
+
+## Save to file
+
+Use `--output_path` to save results to a file.
+
+Example
+```
+$ circle-operator --name --output_path /tmp/result model.circle
+```
+
+Result
+```
+$ cat /tmp/result
+conv1_pad/Pad
+conv1_conv/BiasAdd
+pool1_pad/Pad
+```
diff --git a/compiler/circle-operator/driver/Driver.cpp b/compiler/circle-operator/driver/Driver.cpp
new file mode 100644
index 000000000..f5fd8073c
--- /dev/null
+++ b/compiler/circle-operator/driver/Driver.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dump.h"
+
+#include <arser/arser.h>
+#include <foder/FileLoader.h>
+#include <fstream>
+
+#include <functional>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <vector>
+#include <string>
+
+#include <signal.h>
+
+void handle_segfault(int signal, siginfo_t *si, void *arg)
+{
+ std::cerr << "ERROR: Failed to load file" << std::endl;
+ exit(255);
+}
+
+int entry(int argc, char **argv)
+{
+ // TODO add option to dump for all sub-graphs
+ arser::Arser arser{
+ "circle-operator allows users to retrieve operator information from a Circle model file"};
+ arser.add_argument("--name").nargs(0).help("Dump operators name in circle file");
+ arser.add_argument("--code").nargs(0).help("Dump operators code in circle file");
+ arser.add_argument("--output_path").help("Save output to file (default output is console)");
+ arser.add_argument("circle").help("Circle file to dump");
+
+ try
+ {
+ arser.parse(argc, argv);
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cerr << err.what() << std::endl;
+ std::cerr << arser;
+ return 255;
+ }
+
+ cirops::DumpOption option;
+ option.names = arser["--name"];
+ option.codes = arser["--code"];
+
+ std::ofstream oFstream;
+ std::ostream *oStream = &std::cout;
+ if (arser["--output_path"])
+ {
+ auto output_path = arser.get<std::string>("--output_path");
+ oFstream.open(output_path, std::ofstream::out | std::ofstream::trunc);
+ if (oFstream.fail())
+ {
+ std::cerr << "ERROR: Failed to create output to file " << output_path << std::endl;
+ return 255;
+ }
+ oStream = &oFstream;
+ }
+
+ // hook segment fault
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(struct sigaction));
+ sigemptyset(&sa.sa_mask);
+ sa.sa_sigaction = handle_segfault;
+ sa.sa_flags = SA_SIGINFO;
+ sigaction(SIGSEGV, &sa, NULL);
+
+ std::string modelFile = arser.get<std::string>("circle");
+ // Load Circle model from a circle file
+ try
+ {
+ foder::FileLoader fileLoader{modelFile};
+ std::vector<char> modelData = fileLoader.load();
+ const circle::Model *circleModel = circle::GetModel(modelData.data());
+ if (circleModel == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load circle '" << modelFile << "'" << std::endl;
+ return 255;
+ }
+ cirops::DumpOperators dump;
+ dump.run(*oStream, circleModel, option);
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cerr << "ERROR: " << err.what() << std::endl;
+ return 255;
+ }
+
+ if (oFstream.is_open())
+ {
+ oFstream.close();
+ }
+
+ return 0;
+}
diff --git a/compiler/circle-operator/requires.cmake b/compiler/circle-operator/requires.cmake
new file mode 100644
index 000000000..183dfe227
--- /dev/null
+++ b/compiler/circle-operator/requires.cmake
@@ -0,0 +1,4 @@
+require("arser")
+require("foder")
+require("mio-circle04")
+require("safemain")
diff --git a/compiler/circle-operator/src/Dump.cpp b/compiler/circle-operator/src/Dump.cpp
new file mode 100644
index 000000000..36bfe8632
--- /dev/null
+++ b/compiler/circle-operator/src/Dump.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dump.h"
+
+#include <mio_circle/Helper.h>
+#include <mio_circle/Reader.h>
+
+#include <ostream>
+
+namespace
+{
+
+void dump_ops(std::ostream &os, mio::circle::Reader &reader, const cirops::DumpOption &option)
+{
+ auto ops = reader.operators();
+ for (uint32_t i = 0; i < ops->Length(); ++i)
+ {
+ const auto op = ops->Get(i);
+ const auto op_name = reader.opcode_name(op);
+
+ if (option.all_graphs)
+ {
+ // NOTE all_graphs is false for now
+ // TODO check using '$' as split key
+ os << i << "$";
+ }
+
+ if (option.codes)
+ {
+ const auto op_name = reader.opcode_name(op);
+ os << op_name;
+ }
+ if (option.names)
+ {
+ // TODO multiple outputs?
+ const auto tensors = reader.tensors();
+ const auto output_tensors = reader.outputs(op);
+ const auto output = output_tensors.at(0);
+ const auto tensor = tensors->Get(output);
+ const std::string name = mio::circle::tensor_name(tensor);
+ if (option.codes)
+ {
+ os << ",";
+ }
+ os << name;
+ }
+ os << std::endl;
+ }
+}
+
+} // namespace
+
+namespace cirops
+{
+
+void DumpOperators::run(std::ostream &os, const circle::Model *model, const DumpOption &option)
+{
+ mio::circle::Reader reader(model);
+
+ const uint32_t subgraph_size = reader.num_subgraph();
+ for (uint32_t g = 0; g < subgraph_size; g++)
+ {
+ reader.select_subgraph(g);
+ dump_ops(os, reader, option);
+
+ if (!option.all_graphs)
+ break;
+ }
+}
+
+} // namespace cirops
diff --git a/compiler/circle-operator/src/Dump.h b/compiler/circle-operator/src/Dump.h
new file mode 100644
index 000000000..aa1d1be49
--- /dev/null
+++ b/compiler/circle-operator/src/Dump.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DUMP_H__
+#define __DUMP_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <ostream>
+
+namespace cirops
+{
+
+struct DumpOption
+{
+ bool names = false;
+ bool codes = false;
+ bool all_graphs = false;
+};
+
+class DumpOperators
+{
+public:
+ DumpOperators() = default;
+
+public:
+ void run(std::ostream &os, const circle::Model *model, const DumpOption &option);
+};
+
+} // namespace cirops
+
+#endif // __DUMP_H__
diff --git a/compiler/circle-opselector/driver/Driver.cpp b/compiler/circle-opselector/driver/Driver.cpp
index a1ace4f58..4b39a6ddb 100644
--- a/compiler/circle-opselector/driver/Driver.cpp
+++ b/compiler/circle-opselector/driver/Driver.cpp
@@ -159,26 +159,16 @@ int entry(int argc, char **argv)
arser::Arser arser("circle-opselector provides selecting operations in circle model");
- arser.add_argument("--version")
- .nargs(0)
- .default_value(false)
- .help("Show version information and exit")
- .exit_with(print_version);
+ arser::Helper::add_version(arser, print_version);
// TODO Add new options!
- arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
- arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+ arser.add_argument("input").help("Input circle model");
+ arser.add_argument("output").help("Output circle model");
// select option
- arser.add_argument("--by_id")
- .nargs(1)
- .type(arser::DataType::STR)
- .help("Input operation id to select nodes.");
- arser.add_argument("--by_name")
- .nargs(1)
- .type(arser::DataType::STR)
- .help("Input operation name to select nodes.");
+ arser.add_argument("--by_id").help("Input operation id to select nodes.");
+ arser.add_argument("--by_name").help("Input operation name to select nodes.");
try
{
diff --git a/compiler/circle-part-value-test/CMakeLists.txt b/compiler/circle-part-value-test/CMakeLists.txt
index 0657607d2..ffe1b8909 100644
--- a/compiler/circle-part-value-test/CMakeLists.txt
+++ b/compiler/circle-part-value-test/CMakeLists.txt
@@ -82,7 +82,8 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1})
# Run partitioner
add_custom_command(OUTPUT ${PARTITIONER_CONN_JSON}
- COMMAND circle-partitioner "${PART_FILE}" "${PARTITION_NAME}.circle" "${PARTITIONER_OUTPUT_PATH}"
+ COMMAND circle-partitioner "--part_file" "${PART_FILE}" "--input_file"
+ "${PARTITION_NAME}.circle" "--work_path" "${PARTITIONER_OUTPUT_PATH}"
DEPENDS circle-partitioner ${PART_DST_PATH} ${CIRCLE_DST_PATH}
COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}"
)
diff --git a/compiler/circle-partitioner-test/CMakeLists.txt b/compiler/circle-partitioner-test/CMakeLists.txt
index e29a66b41..7b26b3ba7 100644
--- a/compiler/circle-partitioner-test/CMakeLists.txt
+++ b/compiler/circle-partitioner-test/CMakeLists.txt
@@ -57,7 +57,8 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1})
# Run partitioner
set(PART_CONN_JSON "${PART_OUT_PATH}/${PART_NAME}.conn.json")
add_custom_command(OUTPUT ${PART_CONN_JSON}
- COMMAND circle-partitioner "${PART_FILE}" "${PART_NAME}.circle" "${PART_OUT_PATH}"
+ COMMAND circle-partitioner "--part_file" "${PART_FILE}" "--input_file"
+ "${PART_NAME}.circle" "--work_path" "${PART_OUT_PATH}"
DEPENDS circle-partitioner ${CIRCLE_DST_PATH} ${PART_DST_PATH}
COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}"
)
diff --git a/compiler/circle-partitioner/CMakeLists.txt b/compiler/circle-partitioner/CMakeLists.txt
index 9b8f5afae..abc5d93fb 100644
--- a/compiler/circle-partitioner/CMakeLists.txt
+++ b/compiler/circle-partitioner/CMakeLists.txt
@@ -1,7 +1,6 @@
file(GLOB_RECURSE SOURCES "src/*.cpp")
add_executable(circle-partitioner "${SOURCES}")
-target_link_libraries(circle-partitioner foder)
target_link_libraries(circle-partitioner crew)
target_link_libraries(circle-partitioner safemain)
target_link_libraries(circle-partitioner luci_lang)
@@ -17,22 +16,3 @@ target_link_libraries(circle-partitioner vconone)
target_link_libraries(circle-partitioner nncc_common)
install(TARGETS circle-partitioner DESTINATION bin)
-
-# TODO remove circle_partitioner
-add_executable(circle_partitioner "${SOURCES}")
-target_link_libraries(circle_partitioner foder)
-target_link_libraries(circle_partitioner crew)
-target_link_libraries(circle_partitioner safemain)
-target_link_libraries(circle_partitioner luci_lang)
-target_link_libraries(circle_partitioner luci_log)
-target_link_libraries(circle_partitioner luci_import)
-target_link_libraries(circle_partitioner luci_service)
-target_link_libraries(circle_partitioner luci_pass)
-target_link_libraries(circle_partitioner luci_export)
-target_link_libraries(circle_partitioner luci_partition)
-target_link_libraries(circle_partitioner arser)
-target_link_libraries(circle_partitioner pepper_csv2vec)
-target_link_libraries(circle_partitioner vconone)
-target_link_libraries(circle_partitioner nncc_common)
-
-install(TARGETS circle_partitioner DESTINATION bin)
diff --git a/compiler/circle-partitioner/README.md b/compiler/circle-partitioner/README.md
index 2e0a98638..760cf28d1 100644
--- a/compiler/circle-partitioner/README.md
+++ b/compiler/circle-partitioner/README.md
@@ -4,10 +4,10 @@ _circle-partitioner_ provides model partitioning of circle model to two or more
## How circle-partitioner work
-_circle-partitioner_ requires 3 positional arguments
-- first: `partition` file
-- second: `input` circle model file
-- third: `work` folder
+_circle-partitioner_ requires 3 arguments for inputs files
+- `--part_file`: `partition` file, use extension `.part`
+- `--input_file`: `input` circle model file
+- `--work_path`: `work` path where input files reside. this is optional and CWD if omitted
And options to override `partition` file as a helper to try out without editing `partition` file.
- `--backends`: override `backends` of `[partition]` section
@@ -20,7 +20,7 @@ are read from `work` folder.
Outputs are (1) one or more partitioned circle models and (2) connection file that gives how
the partitioned models should be connected to act like the source `input` model.
-Why does input files be placed in `work` folder too?
+Why does input files be placed in `work` path too?
- this is still work in progress condition
- use cases are still ambigious
- original `input` model file can be used by the backend, so `.conn` file links it as `source`
@@ -94,7 +94,8 @@ Net_InstanceNorm_003/
Command example
```
-./circle-partitioner Net_InstanceNorm_003.part Net_InstanceNorm_003.circle Net_InstanceNorm_003
+./circle-partitioner --part_file Net_InstanceNorm_003.part \
+--input_file Net_InstanceNorm_003.circle --work_path= Net_InstanceNorm_003
```
Result of _circle-partitioner_
@@ -171,11 +172,11 @@ Consider partitioning with backends of OneRT
Let's try with this command:
```
-circle_partitioner \
- --partition Net_InstanceNorm_003.part \
- --backends cpu,acl_cl \
- --default cpu \
- Net_InstanceNorm_003.circle Net_InstanceNorm_003
+circle-partitioner \
+ --backends cpu,acl_cl --default cpu \
+ --part_file Net_InstanceNorm_003.part \
+ --input_file Net_InstanceNorm_003.circle \
+ --work_path Net_InstanceNorm_003
```
where `Net_InstanceNorm_003.part` is like this for initial design
diff --git a/compiler/circle-partitioner/requires.cmake b/compiler/circle-partitioner/requires.cmake
index 690d9531c..82d9c2b0f 100644
--- a/compiler/circle-partitioner/requires.cmake
+++ b/compiler/circle-partitioner/requires.cmake
@@ -1,4 +1,3 @@
-require("foder")
require("crew")
require("pepper-csv2vec")
require("safemain")
diff --git a/compiler/circle-partitioner/src/CirclePartitioner.cpp b/compiler/circle-partitioner/src/CirclePartitioner.cpp
index 0151e92d3..5cecb9ae0 100644
--- a/compiler/circle-partitioner/src/CirclePartitioner.cpp
+++ b/compiler/circle-partitioner/src/CirclePartitioner.cpp
@@ -18,9 +18,7 @@
#include "PartitionExport.h"
#include "HelperPath.h"
-#include <foder/FileLoader.h>
-
-#include <luci/Importer.h>
+#include <luci/ImporterEx.h>
#include <luci/Service/Validate.h>
#include <luci/CircleExporter.h>
#include <luci/CircleFileExpContract.h>
@@ -41,9 +39,9 @@ namespace
const char *opt_bks = "--backends";
const char *opt_def = "--default";
-const char *opt_part = "partition";
-const char *opt_input = "input";
-const char *opt_work = "work";
+const char *opt_part_file = "--part_file";
+const char *opt_input_file = "--input_file";
+const char *opt_work_path = "--work_path";
void print_version(void)
{
@@ -53,63 +51,25 @@ void print_version(void)
void build_arser(arser::Arser &arser)
{
- arser.add_argument("--version")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Show version information and exit")
- .exit_with(print_version);
-
- arser.add_argument(opt_bks)
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
- .help("Backends in CSV to use for partitioning");
-
- arser.add_argument(opt_def)
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
- .help("Default backend to assign");
-
- arser.add_argument(opt_part)
- .nargs(1)
- .type(arser::DataType::STR)
+ arser::Helper::add_version(arser, print_version);
+
+ arser.add_argument(opt_bks).help("Backends in CSV to use for partitioning");
+
+ arser.add_argument(opt_def).help("Default backend to assign");
+
+ arser.add_argument(opt_part_file)
+ .required(true)
.help("Partition file which provides backend to assign");
- arser.add_argument(opt_input)
- .nargs(1)
- .type(arser::DataType::STR)
- .help("Input circle model filename");
- arser.add_argument(opt_work)
- .nargs(1)
- .type(arser::DataType::STR)
+ arser.add_argument(opt_input_file).required(true).help("Input circle model filename");
+ arser.add_argument(opt_work_path)
.help("Work folder of partition, input files exist and output files are produced");
}
std::unique_ptr<luci::Module> load_model(const std::string &input_path)
{
- // Load model from the file
- foder::FileLoader file_loader{input_path};
- std::vector<char> model_data = file_loader.load();
-
- // Verify flatbuffers
- flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
- if (!circle::VerifyModelBuffer(verifier))
- {
- std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
- return nullptr;
- }
-
- const circle::Model *circle_model = circle::GetModel(model_data.data());
- if (circle_model == nullptr)
- {
- std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
- return nullptr;
- }
-
// Import from input Circle file
- luci::Importer importer;
- return importer.importModule(circle_model);
+ luci::ImporterEx importerex;
+ return importerex.importVerifyModule(input_path);
}
} // namespace
@@ -133,9 +93,14 @@ int entry(int argc, char **argv)
return EXIT_FAILURE;
}
- std::string partition_file = arser.get<std::string>(opt_part);
- std::string input_file = arser.get<std::string>(opt_input);
- std::string work_folder = arser.get<std::string>(opt_work);
+ std::string partition_file = arser.get<std::string>(opt_part_file);
+ std::string input_file = arser.get<std::string>(opt_input_file);
+ std::string work_folder = ".";
+
+ if (arser[opt_work_path])
+ {
+ work_folder = arser.get<std::string>(opt_work_path);
+ }
std::string partition_path = work_folder + "/" + partition_file;
std::string input_path = work_folder + "/" + input_file;
diff --git a/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt
index 5ec8b6ee5..a3a2902d9 100644
--- a/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt
+++ b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt
@@ -18,7 +18,7 @@ unset(TEST_NAMES)
get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
set(options USE_QCONFIG)
-set(oneValueArgs DTYPE GRANULARITY)
+set(oneValueArgs DTYPE GRANULARITY INPUT_DTYPE OUTPUT_DTYPE)
set(multiValueArgs "")
macro(Add RECIPE)
@@ -29,6 +29,16 @@ macro(Add RECIPE)
set(QCONFIG_OPT "--config" "${ARTIFACTS_BIN_PATH}/${RECIPE}.qconf.json")
endif()
+ set(INPUT_DTYPE_OPT "")
+ if(ARG_INPUT_DTYPE)
+ set(INPUT_DTYPE_OPT "--input_type" "${ARG_INPUT_DTYPE}")
+ endif()
+
+ set(OUTPUT_DTYPE_OPT "")
+ if(ARG_OUTPUT_DTYPE)
+ set(OUTPUT_DTYPE_OPT "--output_type" "${ARG_OUTPUT_DTYPE}")
+ endif()
+
set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle")
set(FAKE_QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.fq.circle")
set(RECORDED_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.recorded.circle")
@@ -38,7 +48,10 @@ macro(Add RECIPE)
add_custom_command(OUTPUT ${QUANT_CIRCLE_PATH}
COMMAND $<TARGET_FILE:circle-quantizer> --quantize_dequantize_weights float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${CIRCLE_PATH} ${FAKE_QUANT_CIRCLE_PATH}
COMMAND $<TARGET_FILE:record-minmax> --input_model ${FAKE_QUANT_CIRCLE_PATH} --output_model ${RECORDED_CIRCLE_PATH}
- COMMAND $<TARGET_FILE:circle-quantizer> --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH}
+ COMMAND $<TARGET_FILE:circle-quantizer>
+ --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY}
+ ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH}
+ ${INPUT_DTYPE_OPT} ${OUTPUT_DTYPE_OPT}
DEPENDS
circle-quantizer
record-minmax
diff --git a/compiler/circle-quantizer-dredd-recipe-test/test.lst b/compiler/circle-quantizer-dredd-recipe-test/test.lst
index 188103016..58f89c767 100644
--- a/compiler/circle-quantizer-dredd-recipe-test/test.lst
+++ b/compiler/circle-quantizer-dredd-recipe-test/test.lst
@@ -6,10 +6,75 @@
## TFLITE RECIPE
+# MPQ Test (default: u8, target: s16)
+Add(Quant_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_AveragePool2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_BatchMatMul_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Concatenation_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_003 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_DepthwiseConv2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_FullyConnected_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_LeakyRelu_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Logistic_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_MaxPool2D_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Mean_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Mul_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Neg_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Pad_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_PRelu_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ReLU_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ReLU6_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Reshape_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ResizeBilinear_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ResizeNearestNeighbor_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Slice_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Softmax_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Tanh_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Transpose_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_TransposeConv_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+
+# MPQ Test (default: s16, target: u8)
+Add(Quant_Add_002 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_AveragePool2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_BatchMatMul_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Concatenation_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_004 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_DepthwiseConv2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_FullyConnected_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_LeakyRelu_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Logistic_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_MaxPool2D_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Mean_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Mul_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Neg_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Pad_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_PRelu_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ReLU_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ReLU6_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Reshape_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ResizeBilinear_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_ResizeNearestNeighbor_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Slice_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Softmax_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Tanh_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Transpose_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+Add(Quant_TransposeConv_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
+
Add(Quant_Conv_Mul_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
Add(Quant_Conv_Mul_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
Add(Quant_Conv_Mul_Add_002 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
Add(Quant_Split_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
Add(Quant_Split_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_000 DTYPE uint8 GRANULARITY channel INPUT_DTYPE float32)
+Add(Quant_Conv_001 DTYPE uint8 GRANULARITY channel OUTPUT_DTYPE float32)
+Add(Quant_Conv_002 DTYPE uint8 GRANULARITY channel INPUT_DTYPE float32 OUTPUT_DTYPE float32)
AddFakeQuant(Quant_Add_000)
+
+## CIRCLE RECIPE
+
+# MPQ Test (default: u8, target: s16)
+Add(Quant_InstanceNorm_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+
+# MPQ Test (default: s16, target: u8)
+Add(Quant_InstanceNorm_001 DTYPE int16 GRANULARITY channel USE_QCONFIG)
diff --git a/compiler/circle-quantizer/CMakeLists.txt b/compiler/circle-quantizer/CMakeLists.txt
index 14e00972b..16e41a327 100644
--- a/compiler/circle-quantizer/CMakeLists.txt
+++ b/compiler/circle-quantizer/CMakeLists.txt
@@ -10,7 +10,6 @@ add_executable(circle-quantizer "${SOURCES}")
target_include_directories(circle-quantizer PRIVATE ${Jsoncpp_INCLUDE_DIRS})
target_link_libraries(circle-quantizer ${Jsoncpp_STATIC_LIB})
-target_link_libraries(circle-quantizer foder)
target_link_libraries(circle-quantizer safemain)
target_link_libraries(circle-quantizer oops)
target_link_libraries(circle-quantizer loco)
diff --git a/compiler/circle-quantizer/requires.cmake b/compiler/circle-quantizer/requires.cmake
index c21e28e8d..4fcee1873 100644
--- a/compiler/circle-quantizer/requires.cmake
+++ b/compiler/circle-quantizer/requires.cmake
@@ -1,4 +1,3 @@
-require("foder")
require("loco")
require("locop")
require("safemain")
diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp
index e0c85cb6e..f1e31ed8d 100644
--- a/compiler/circle-quantizer/src/CircleQuantizer.cpp
+++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp
@@ -14,9 +14,7 @@
* limitations under the License.
*/
-#include <foder/FileLoader.h>
-
-#include <luci/Importer.h>
+#include <luci/ImporterEx.h>
#include <luci/CircleQuantizer.h>
#include <luci/Service/Validate.h>
#include <luci/CircleExporter.h>
@@ -59,13 +57,31 @@ std::vector<std::shared_ptr<LayerParam>> read_layer_params(std::string &filename
std::vector<std::shared_ptr<LayerParam>> p;
for (auto layer : layers)
{
- auto l = std::make_shared<LayerParam>();
+ if (layer.isMember("name"))
{
- l->name = layer["name"].asString();
- l->dtype = layer["dtype"].asString();
- l->granularity = layer["granularity"].asString();
+ auto l = std::make_shared<LayerParam>();
+ {
+ l->name = layer["name"].asString();
+ l->dtype = layer["dtype"].asString();
+ l->granularity = layer["granularity"].asString();
+ }
+ p.emplace_back(l);
+ }
+
+ // Multiple names with the same dtype & granularity
+ if (layer.isMember("names"))
+ {
+ for (auto name : layer["names"])
+ {
+ auto l = std::make_shared<LayerParam>();
+ {
+ l->name = name.asString();
+ l->dtype = layer["dtype"].asString();
+ l->granularity = layer["granularity"].asString();
+ }
+ p.emplace_back(l);
+ }
}
- p.emplace_back(l);
}
return p;
@@ -109,23 +125,12 @@ int entry(int argc, char **argv)
arser::Arser arser("circle-quantizer provides circle model quantization");
- arser.add_argument("--version")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Show version information and exit")
- .exit_with(print_version);
-
- arser.add_argument("-V", "--verbose")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("output additional information to stdout or stderr");
+ arser::Helper::add_version(arser, print_version);
+ arser::Helper::add_verbose(arser);
arser.add_argument(qdqw)
.nargs(3)
.type(arser::DataType::STR_VEC)
- .required(false)
.help("Quantize-dequantize weight values required action before quantization. "
"Three arguments required: input_model_dtype(float32) "
"output_model_dtype(uint8) granularity(layer, channel)");
@@ -133,28 +138,24 @@ int entry(int argc, char **argv)
arser.add_argument(qwmm)
.nargs(3)
.type(arser::DataType::STR_VEC)
- .required(false)
.help("Quantize with min/max values. "
"Three arguments required: input_model_dtype(float32) "
"output_model_dtype(uint8) granularity(layer, channel)");
arser.add_argument(tf_maxpool)
.nargs(0)
- .required(false)
.default_value(false)
.help("Force MaxPool Op to have the same input/output quantparams. NOTE: This feature can "
"degrade accuracy of some models");
arser.add_argument(fake_quant)
.nargs(0)
- .required(false)
.help("Convert a quantized model to a fake-quantized model. NOTE: This feature will "
"generate an fp32 model.");
arser.add_argument(rq)
.nargs(2)
.type(arser::DataType::STR_VEC)
- .required(false)
.help("Requantize a quantized model. "
"Two arguments required: input_model_dtype(int8) "
"output_model_dtype(uint8)");
@@ -162,7 +163,6 @@ int entry(int argc, char **argv)
arser.add_argument(fq)
.nargs(3)
.type(arser::DataType::STR_VEC)
- .required(false)
.accumulated(true)
.help("Write quantization parameters to the specified tensor. "
"Three arguments required: tensor_name(string), "
@@ -171,32 +171,21 @@ int entry(int argc, char **argv)
arser.add_argument(cq)
.nargs(2)
.type(arser::DataType::STR_VEC)
- .required(false)
.accumulated(true)
.help("Copy quantization parameter from a tensor to another tensor."
"Two arguments required: source_tensor_name(string), "
"destination_tensor_name(string)");
arser.add_argument("--input_type")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
- .help("Input type of quantized model (uint8 or int16)");
+ .help("Input type of quantized model (uint8, int16, or float32)");
arser.add_argument("--output_type")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
- .help("Output type of quantized model (uint8 or int16)");
+ .help("Output type of quantized model (uint8, int16, or float32)");
- arser.add_argument(cfg)
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
- .help("Path to the quantization configuration file");
+ arser.add_argument(cfg).help("Path to the quantization configuration file");
- arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
- arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+ arser.add_argument("input").help("Input circle model");
+ arser.add_argument("output").help("Output circle model");
arser.add_argument(gpd).nargs(0).required(false).default_value(false).help(
"This will turn on profiling data generation.");
@@ -384,27 +373,10 @@ int entry(int argc, char **argv)
settings->set(luci::UserSettings::Key::ProfilingDataGen, true);
// Load model from the file
- foder::FileLoader file_loader{input_path};
- std::vector<char> model_data = file_loader.load();
-
- // Verify flatbuffers
- flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
- if (!circle::VerifyModelBuffer(verifier))
- {
- std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
- return EXIT_FAILURE;
- }
-
- const circle::Model *circle_model = circle::GetModel(model_data.data());
- if (circle_model == nullptr)
- {
- std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+ luci::ImporterEx importerex;
+ auto module = importerex.importVerifyModule(input_path);
+ if (module.get() == nullptr)
return EXIT_FAILURE;
- }
-
- // Import from input Circle file
- luci::Importer importer;
- auto module = importer.importModule(circle_model);
for (size_t idx = 0; idx < module->size(); ++idx)
{
diff --git a/compiler/circle-tensordump/driver/Driver.cpp b/compiler/circle-tensordump/driver/Driver.cpp
index 70f3c8d84..c32dc3f5a 100644
--- a/compiler/circle-tensordump/driver/Driver.cpp
+++ b/compiler/circle-tensordump/driver/Driver.cpp
@@ -31,11 +31,9 @@ int entry(int argc, char **argv)
arser::Arser arser{
"circle-tensordump allows users to retrieve tensor information from a Circle model file"};
- arser.add_argument("circle").nargs(1).type(arser::DataType::STR).help("Circle file path to dump");
+ arser.add_argument("circle").help("Circle file path to dump");
arser.add_argument("--tensors").nargs(0).help("Dump to console");
arser.add_argument("--tensors_to_hdf5")
- .nargs(1)
- .type(arser::DataType::STR)
.help("Dump to hdf5 file. Specify hdf5 file path to be dumped");
try
diff --git a/compiler/circle-tensordump/src/Dump.cpp b/compiler/circle-tensordump/src/Dump.cpp
index e477a7417..49afa73df 100644
--- a/compiler/circle-tensordump/src/Dump.cpp
+++ b/compiler/circle-tensordump/src/Dump.cpp
@@ -15,7 +15,8 @@
*/
#include "Dump.h"
-#include "Reader.h"
+
+#include <mio_circle/Reader.h>
#include <H5Cpp.h>
@@ -102,7 +103,7 @@ namespace circletensordump
void DumpTensors::run(std::ostream &os, const circle::Model *model, const std::string &)
{
- circletensordump::Reader reader(model);
+ mio::circle::Reader reader(model);
uint32_t num_subgraph = reader.num_subgraph();
auto buffers = reader.buffers();
@@ -296,7 +297,7 @@ void DumpTensorsToHdf5::run(std::ostream &os, const circle::Model *model,
const std::string &output_path)
{
// loads a circle model
- circletensordump::Reader reader(model);
+ mio::circle::Reader reader(model);
uint32_t num_subgraph = reader.num_subgraph();
// create a hdf5 file
diff --git a/compiler/circle-tensordump/src/Reader.cpp b/compiler/circle-tensordump/src/Reader.cpp
deleted file mode 100644
index 47b876054..000000000
--- a/compiler/circle-tensordump/src/Reader.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Reader.h"
-
-#include <mio_circle/Helper.h>
-
-#include <sstream>
-#include <string>
-
-namespace circletensordump
-{
-
-Reader::Reader(const circle::Model *model)
-{
- _subgraphs = model->subgraphs();
- _buffers = model->buffers();
-
- auto opcodes = model->operator_codes();
- for (const ::circle::OperatorCode *opcode : *opcodes)
- {
- _op_codes.push_back(opcode);
- }
-}
-
-size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
-{
- if (buff_data != nullptr)
- {
- *buff_data = nullptr;
- }
-
- if (buf_idx == 0)
- return 0;
-
- if (auto *buffer = (*_buffers)[buf_idx])
- {
- if (auto *array = buffer->data())
- {
- if (size_t size = array->size())
- {
- if (buff_data != nullptr)
- {
- *buff_data = reinterpret_cast<const uint8_t *>(array->data());
- }
- return size;
- }
- }
- }
-
- return 0;
-}
-
-circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
-{
- uint32_t index = op->opcode_index();
- assert(index < _op_codes.size());
- const circle::OperatorCode *opcode = _op_codes.at(index);
-
- return mio::circle::builtin_code_neutral(opcode);
-}
-
-std::string Reader::opcode_name(const circle::Operator *op) const
-{
- uint32_t index = op->opcode_index();
- assert(index < _op_codes.size());
- const circle::OperatorCode *opcode = _op_codes.at(index);
-
- if (!mio::circle::is_valid(opcode))
- {
- std::ostringstream oss;
- oss << "(invalid: " << index << ")";
- return oss.str();
- }
-
- return mio::circle::opcode_name(opcode);
-}
-
-bool Reader::select_subgraph(uint32_t sgindex)
-{
- _tensors = nullptr;
- _operators = nullptr;
-
- _inputs.clear();
- _outputs.clear();
-
- if (_subgraphs->Length() <= sgindex)
- {
- assert(false);
- return false;
- }
-
- const circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
-
- _tensors = subgraph->tensors();
- _operators = subgraph->operators();
-
- _inputs = as_index_vector(subgraph->inputs());
- _outputs = as_index_vector(subgraph->outputs());
-
- return true;
-}
-
-} // namespace circletensordump
diff --git a/compiler/circle-tensordump/src/Reader.h b/compiler/circle-tensordump/src/Reader.h
deleted file mode 100644
index c868bc277..000000000
--- a/compiler/circle-tensordump/src/Reader.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLE_TENSORDUMP_READER_H__
-#define __CIRCLE_TENSORDUMP_READER_H__
-
-#include <mio/circle/schema_generated.h>
-
-#include <map>
-#include <string>
-#include <vector>
-
-namespace circletensordump
-{
-
-template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
-{
- std::vector<T> ret(flat_array->Length());
- for (uint32_t i = 0; i < flat_array->Length(); i++)
- {
- ret[i] = flat_array->Get(i);
- }
- return ret;
-}
-
-/**
- * @brief Loads Circle file and provides helpers to access attributes
- */
-class Reader
-{
-private:
- using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
- using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
- using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
- using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
-
-public:
- Reader(const circle::Model *model);
-
- Reader() = delete;
-
-public:
- const std::vector<const circle::OperatorCode *> &opcodes() { return _op_codes; }
- const CircleBuffers_t *buffers() { return _buffers; }
- const CircleTensors_t *tensors() { return _tensors; }
- const CircleOperators_t *operators() { return _operators; }
- const std::vector<int32_t> &inputs() const { return _inputs; }
- const std::vector<int32_t> &outputs() const { return _outputs; }
-
- uint32_t num_subgraph() const { return _subgraphs->Length(); }
-
- size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
- circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
- std::string opcode_name(const circle::Operator *op) const;
-
-public:
- bool select_subgraph(uint32_t subgraph);
-
-private:
- const CircleSubGraphs_t *_subgraphs{nullptr};
- const CircleBuffers_t *_buffers{nullptr};
- const CircleTensors_t *_tensors{nullptr};
- const CircleOperators_t *_operators{nullptr};
-
- std::vector<const circle::OperatorCode *> _op_codes;
- std::vector<int32_t> _inputs;
- std::vector<int32_t> _outputs;
-};
-
-} // namespace circletensordump
-
-#endif // __CIRCLE_TENSORDUMP_READER_H__
diff --git a/compiler/circle-verify/src/Driver.cpp b/compiler/circle-verify/src/Driver.cpp
index 7a44c65b9..c3a414701 100644
--- a/compiler/circle-verify/src/Driver.cpp
+++ b/compiler/circle-verify/src/Driver.cpp
@@ -25,7 +25,7 @@
int entry(int argc, char **argv)
{
arser::Arser arser;
- arser.add_argument("circle").type(arser::DataType::STR).help("Circle file path to verify");
+ arser.add_argument("circle").help("Circle file path to verify");
try
{
diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst
index f41aac303..a6f2786d2 100644
--- a/compiler/circle2circle-dredd-recipe-test/test.lst
+++ b/compiler/circle2circle-dredd-recipe-test/test.lst
@@ -31,6 +31,8 @@ Add(Net_TConv_Add_002 PASS fuse_add_with_tconv)
Add(Net_TConv_BN_000 PASS fuse_batchnorm_with_tconv)
Add(Net_TConv_BN_001 PASS fuse_batchnorm_with_tconv)
Add(Net_TConv_BN_002 PASS fuse_batchnorm_with_tconv)
+Add(Net_TConv_BN_003 PASS fuse_batchnorm_with_tconv)
+Add(Net_TConv_BN_004 PASS fuse_batchnorm_with_tconv)
Add(Net_InstanceNorm_001 PASS fuse_instnorm)
Add(Net_InstanceNorm_003 PASS fuse_instnorm)
Add(Net_InstanceNorm_004 PASS fuse_instnorm)
@@ -46,6 +48,7 @@ Add(StridedSlice_003 PASS substitute_strided_slice_to_reshape)
Add(MaxPoolWithArgmax_000 PASS resolve_customop_max_pool_with_argmax)
Add(MaxPoolWithArgmax_001 PASS resolve_customop_max_pool_with_argmax)
Add(MaxPoolWithArgmax_002 PASS resolve_customop_max_pool_with_argmax)
+Add(FullyConnected_007 PASS replace_non_const_fc_with_batch_matmul)
## CIRCLE RECIPE
diff --git a/compiler/circle2circle/CMakeLists.txt b/compiler/circle2circle/CMakeLists.txt
index cd79967b7..dbe485b9f 100644
--- a/compiler/circle2circle/CMakeLists.txt
+++ b/compiler/circle2circle/CMakeLists.txt
@@ -4,7 +4,6 @@ list(REMOVE_ITEM SOURCES ${TESTS})
add_executable(circle2circle "${SOURCES}")
target_include_directories(circle2circle PRIVATE src)
-target_link_libraries(circle2circle foder)
target_link_libraries(circle2circle nncc_common)
target_link_libraries(circle2circle safemain)
target_link_libraries(circle2circle oops)
@@ -29,7 +28,6 @@ nnas_find_package(GTest REQUIRED)
GTest_AddTest(circle2circle_test ${TESTS} ${SOURCES})
target_include_directories(circle2circle_test PRIVATE src)
-target_link_libraries(circle2circle_test foder)
target_link_libraries(circle2circle_test nncc_common)
target_link_libraries(circle2circle_test oops)
target_link_libraries(circle2circle_test hermes)
diff --git a/compiler/circle2circle/requires.cmake b/compiler/circle2circle/requires.cmake
index b6c61198f..4e5ed0dd1 100644
--- a/compiler/circle2circle/requires.cmake
+++ b/compiler/circle2circle/requires.cmake
@@ -1,4 +1,3 @@
-require("foder")
require("loco")
require("locop")
require("logo-core")
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp
index ae677a321..f5cf0d782 100644
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -14,9 +14,7 @@
* limitations under the License.
*/
-#include <foder/FileLoader.h>
-
-#include <luci/Importer.h>
+#include <luci/ImporterEx.h>
#include <luci/CircleOptimizer.h>
#include <luci/Service/ChangeOutputs.h>
#include <luci/Service/Validate.h>
@@ -54,6 +52,11 @@ void csv_tokenize(const std::string &data, std::vector<std::string> &result)
result.push_back(token);
}
+void add_switch(arser::Arser &arser, const char *opt, const char *desc)
+{
+ arser.add_argument(opt).nargs(0).default_value(false).help(desc);
+}
+
int entry(int argc, char **argv)
{
// Simple argument parser (based on map)
@@ -64,368 +67,125 @@ int entry(int argc, char **argv)
arser::Arser arser("circle2circle provides circle model optimization and transformations");
- arser.add_argument("--version")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Show version information and exit")
- .exit_with(print_version);
-
- arser.add_argument("-V", "--verbose")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("output additional information to stdout or stderr");
-
- arser.add_argument("--O1").nargs(0).required(false).default_value(false).help(
- "Enable O1 optimize options");
-
- arser.add_argument("--fold_add_v2")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fold AddV2 operators with constant inputs");
-
- arser.add_argument("--fold_cast")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fold Cast operators with constant input");
-
- arser.add_argument("--fold_dequantize")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fold dequantize op");
-
- arser.add_argument("--fold_dwconv")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fold Depthwise Convolution operator with constant inputs");
-
- arser.add_argument("--fold_gather")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fold Gather operator");
-
- arser.add_argument("--fold_sparse_to_dense")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fold SparseToDense operator");
-
- arser.add_argument("--forward_reshape_to_unaryop")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will move Reshape after UnaryOp for centain condition");
-
- arser.add_argument("--fuse_activation_function")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse Activation function to a preceding operator");
-
- arser.add_argument("--fuse_add_with_fully_connected")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse Add operator to FullyConnected operator");
-
- arser.add_argument("--fuse_add_with_tconv")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse Add operator to Transposed Convolution operator");
-
- arser.add_argument("--fuse_batchnorm_with_conv")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse BatchNorm operators to Convolution operator");
-
- arser.add_argument("--fuse_batchnorm_with_dwconv")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse BatchNorm operators to Depthwise Convolution operator");
-
- arser.add_argument("--fuse_batchnorm_with_tconv")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse BatchNorm operators to Transposed Convolution operator");
-
- arser.add_argument("--fuse_bcq")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse operators and apply Binary Coded Quantization");
-
- arser.add_argument("--fuse_instnorm")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse operators to InstanceNorm operator");
-
- arser.add_argument("--fuse_mean_with_mean")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse two Mean operations when they follow one by one."
- "This will fold them into one operation and merge reduction indices.");
-
- arser.add_argument("--fuse_transpose_with_mean")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse Mean operation with a preceding Transpose under certain conditions.");
-
- arser.add_argument("--make_batchnorm_gamma_positive")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will make negative gamma of BatchNorm into a small positive value (1e-10). Note "
- "that this pass can change the execution result of the model. So, use it only when the "
- "impact is known to be acceptable.");
-
- arser.add_argument("--fuse_preactivation_batchnorm")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse BatchNorm operators of pre-activations to Convolution operator");
-
- arser.add_argument("--remove_fakequant")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will remove FakeQuant operators");
-
- arser.add_argument("--remove_quantdequant")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will remove Quantize-Dequantize sequence");
-
- arser.add_argument("--remove_redundant_quantize")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will remove redundant Quantize operators");
-
- arser.add_argument("--remove_redundant_reshape")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse or remove subsequent Reshape operators");
-
- arser.add_argument("--remove_redundant_transpose")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will fuse or remove subsequent Transpose operators");
-
- arser.add_argument("--remove_unnecessary_reshape")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will remove unnecessary reshape operators");
-
- arser.add_argument("--remove_unnecessary_slice")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will remove unnecessary slice operators");
-
- arser.add_argument("--remove_unnecessary_strided_slice")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will remove unnecessary strided slice operators");
-
- arser.add_argument("--remove_unnecessary_split")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will remove unnecessary split operators");
-
- arser.add_argument("--replace_cw_mul_add_with_depthwise_conv")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will replace channel-wise mul/add with DepthwiseConv2D operator");
-
- arser.add_argument("--replace_sub_with_add")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will replace sub with add operator");
-
- arser.add_argument("--resolve_customop_add")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert Custom(Add) to Add operator");
-
- arser.add_argument("--resolve_customop_batchmatmul")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert Custom(BatchMatmul) to BatchMatmul operator");
-
- arser.add_argument("--resolve_customop_matmul")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert Custom(Matmul) to Matmul operator");
-
- arser.add_argument("--resolve_customop_max_pool_with_argmax")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert Custom(MaxPoolWithArgmax) to equivalent set of operators");
-
- arser.add_argument("--shuffle_weight_to_16x1float32")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that "
- "it only converts weights whose row is a multiple of 16");
-
- arser.add_argument("--substitute_pack_to_reshape")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert single input Pack to Reshape");
-
- arser.add_argument("--substitute_padv2_to_pad")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert certain condition PadV2 to Pad");
-
- arser.add_argument("--substitute_splitv_to_split")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert certain condition SplitV to Split operator");
-
- arser.add_argument("--substitute_squeeze_to_reshape")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert certain condition Squeeze to Reshape");
-
- arser.add_argument("--substitute_strided_slice_to_reshape")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert certain condition Strided_Slice to Reshape");
-
- arser.add_argument("--substitute_transpose_to_reshape")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will convert single input Transpose to Reshape");
-
- arser.add_argument("--expand_broadcast_const")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will expand broadcastable constant inputs");
-
- arser.add_argument("--convert_nchw_to_nhwc")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Experimental: This will convert NCHW operators to NHWC under the assumption that "
- "input model is NCHW.");
-
- arser.add_argument("--nchw_to_nhwc_input_shape")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Convert the input shape of the model (argument for --convert_nchw_to_nhwc).");
-
- arser.add_argument("--nchw_to_nhwc_output_shape")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Convert the output shape of the model (argument for --convert_nchw_to_nhwc).");
-
- arser.add_argument("--transform_min_max_to_relu6")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Transform Minimum(6)-Maximum(0) pattern to Relu6 operator");
-
- arser.add_argument("--transform_min_relu_to_relu6")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Transform Minimum(6)-Relu pattern to Relu6 operator");
-
- arser.add_argument("--mute_warnings")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will turn off warning messages");
-
- arser.add_argument("--disable_validation")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will turn off operator validations. May help input model investigation.");
-
- arser.add_argument("--generate_profile_data")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("This will turn on profiling data generation.");
+ arser::Helper::add_version(arser, print_version);
+ arser::Helper::add_verbose(arser);
+
+ add_switch(arser, "--fold_add_v2", "This will fold AddV2 operators with constant inputs");
+ add_switch(arser, "--fold_cast", "This will fold Cast operators with constant input");
+ add_switch(arser, "--fold_densify",
+ "This will fold Densify operators with sparse constant input");
+ add_switch(arser, "--fold_dequantize", "This will fold dequantize op");
+ add_switch(arser, "--fold_dwconv",
+ "This will fold Depthwise Convolution operator with constant inputs");
+ add_switch(arser, "--fold_gather", "This will fold Gather operator");
+ add_switch(arser, "--fold_sparse_to_dense", "This will fold SparseToDense operator");
+ add_switch(arser, "--forward_reshape_to_unaryop",
+ "This will move Reshape after UnaryOp for centain condition");
+ add_switch(arser, "--fuse_activation_function",
+ "This will fuse Activation function to a preceding operator");
+ add_switch(arser, "--fuse_add_with_fully_connected",
+ "This will fuse Add operator to FullyConnected operator");
+ add_switch(arser, "--fuse_add_with_tconv",
+ "This will fuse Add operator to Transposed Convolution operator");
+ add_switch(arser, "--fuse_batchnorm_with_conv",
+ "This will fuse BatchNorm operators to Convolution operator");
+ add_switch(arser, "--fuse_batchnorm_with_dwconv",
+ "This will fuse BatchNorm operators to Depthwise Convolution operator");
+ add_switch(arser, "--fuse_batchnorm_with_tconv",
+ "This will fuse BatchNorm operators to Transposed Convolution operator");
+ add_switch(arser, "--fuse_bcq", "This will fuse operators and apply Binary Coded Quantization");
+ add_switch(arser, "--fuse_instnorm", "This will fuse operators to InstanceNorm operator");
+ add_switch(arser, "--fuse_mean_with_mean",
+ "This will fuse two Mean operations when they follow one by one. This will fold them "
+ "into one operation and merge reduction indices.");
+ add_switch(arser, "--fuse_transpose_with_mean",
+ "This will fuse Mean operation with a preceding Transpose under certain conditions.");
+ add_switch(arser, "--make_batchnorm_gamma_positive",
+ "This will make negative gamma of BatchNorm into a small positive value (1e-10). "
+ "Note that this pass can change the execution result of the model. So, use it only "
+ "when the impact is known to be acceptable.");
+ add_switch(arser, "--fuse_preactivation_batchnorm",
+ "This will fuse BatchNorm operators of pre-activations to Convolution operator");
+ add_switch(arser, "--remove_fakequant", "This will remove FakeQuant operators");
+ add_switch(arser, "--remove_quantdequant", "This will remove Quantize-Dequantize sequence");
+ add_switch(arser, "--remove_redundant_quantize", "This will remove redundant Quantize operators");
+ add_switch(arser, "--remove_redundant_reshape",
+ "This will fuse or remove subsequent Reshape operators");
+ add_switch(arser, "--remove_redundant_transpose",
+ "This will fuse or remove subsequent Transpose operators");
+ add_switch(arser, "--remove_unnecessary_reshape",
+ "This will remove unnecessary reshape operators");
+ add_switch(arser, "--remove_unnecessary_slice", "This will remove unnecessary slice operators");
+ add_switch(arser, "--remove_unnecessary_strided_slice",
+ "This will remove unnecessary strided slice operators");
+ add_switch(arser, "--remove_unnecessary_split", "This will remove unnecessary split operators");
+ add_switch(arser, "--replace_cw_mul_add_with_depthwise_conv",
+ "This will replace channel-wise mul/add with DepthwiseConv2D operator");
+ add_switch(arser, "--replace_sub_with_add", "This will replace sub with add operator");
+ add_switch(arser, "--resolve_customop_add", "This will convert Custom(Add) to Add operator");
+ add_switch(arser, "--resolve_customop_batchmatmul",
+ "This will convert Custom(BatchMatmul) to BatchMatmul operator");
+ add_switch(arser, "--resolve_customop_matmul",
+ "This will convert Custom(Matmul) to Matmul operator");
+ add_switch(arser, "--resolve_customop_max_pool_with_argmax",
+ "This will convert Custom(MaxPoolWithArgmax) to equivalent set of operators");
+ add_switch(arser, "--resolve_customop_splitv",
+ "This will convert Custom(SplitV) to SplitV operator");
+ add_switch(arser, "--shuffle_weight_to_16x1float32",
+ "This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32. Note that "
+ "it only converts weights whose row is a multiple of 16");
+ add_switch(arser, "--replace_non_const_fc_with_batch_matmul",
+ "Replace FullyConnected with BatchMatMul when its weight is non-constant");
+ add_switch(arser, "--substitute_pack_to_reshape",
+ "This will convert single input Pack to Reshape");
+ add_switch(arser, "--substitute_padv2_to_pad",
+ "This will convert certain condition PadV2 to Pad");
+ add_switch(arser, "--substitute_splitv_to_split",
+ "This will convert certain condition SplitV to Split operator");
+ add_switch(arser, "--substitute_squeeze_to_reshape",
+ "This will convert certain condition Squeeze to Reshape");
+ add_switch(arser, "--substitute_strided_slice_to_reshape",
+ "This will convert certain condition Strided_Slice to Reshape");
+ add_switch(arser, "--substitute_transpose_to_reshape",
+ "This will convert single input Transpose to Reshape");
+ add_switch(arser, "--expand_broadcast_const", "This will expand broadcastable constant inputs");
+ add_switch(arser, "--convert_nchw_to_nhwc",
+ "Experimental: This will convert NCHW operators to NHWC under the assumption that "
+ "input model is NCHW.");
+ add_switch(arser, "--nchw_to_nhwc_input_shape",
+ "Convert the input shape of the model (argument for --convert_nchw_to_nhwc).");
+ add_switch(arser, "--nchw_to_nhwc_output_shape",
+ "Convert the output shape of the model (argument for --convert_nchw_to_nhwc).");
+ add_switch(arser, "--transform_min_max_to_relu6",
+ "Transform Minimum(6)-Maximum(0) pattern to Relu6 operator");
+ add_switch(arser, "--transform_min_relu_to_relu6",
+ "Transform Minimum(6)-Relu pattern to Relu6 operator");
+ add_switch(arser, "--mute_warnings", "This will turn off warning messages");
+ add_switch(arser, "--disable_validation",
+ "This will turn off operator validations. May help input model investigation.");
+ add_switch(arser, "--generate_profile_data", "This will turn on profiling data generation.");
arser.add_argument("--change_outputs")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
.help("Experimental: Change first subgraph output nodes to CSV names");
- arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
- arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+ arser.add_argument("input").help("Input circle model");
+ arser.add_argument("output").help("Output circle model");
// sparsification argument
- arser.add_argument("--sparsify_tensor")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
- .help("Tensor name that you want to sparsify");
+ arser.add_argument("--sparsify_tensor").help("Tensor name that you want to sparsify");
arser.add_argument("--sparsify_traversal_order")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
.default_value("0,1,2,3")
.help("Traversal order of dimensions. Default value: 0,1,2,3");
arser.add_argument("--sparsify_format")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
.default_value("d,s")
.help("Format of each dimension. 'd' stands for dense, 's' stands for sparse(CSR). Default "
"value: d,s");
- arser.add_argument("--sparsify_block_size")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
- .help("Size of each block dimension");
+ arser.add_argument("--sparsify_block_size").help("Size of each block dimension");
arser.add_argument("--sparsify_block_map")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
.default_value("0,1")
.help("Map from block dimension to the original tensor dimension. Default value: 0,1");
@@ -446,20 +206,12 @@ int entry(int argc, char **argv)
// If REPLACE is zero, it does not overwrite an existing value.
setenv("LUCI_LOG", "100", 0);
}
- if (arser.get<bool>("--O1"))
- {
- options->enable(Algorithms::FuseBCQ);
- options->enable(Algorithms::FuseInstanceNorm);
- options->enable(Algorithms::ResolveCustomOpAdd);
- options->enable(Algorithms::ResolveCustomOpBatchMatMul);
- options->enable(Algorithms::ResolveCustomOpMatMul);
- options->enable(Algorithms::RemoveRedundantTranspose);
- options->enable(Algorithms::SubstitutePackToReshape);
- }
if (arser.get<bool>("--fold_add_v2"))
options->enable(Algorithms::FoldAddV2);
if (arser.get<bool>("--fold_cast"))
options->enable(Algorithms::FoldCast);
+ if (arser.get<bool>("--fold_densify"))
+ options->enable(Algorithms::FoldDensify);
if (arser.get<bool>("--fold_dequantize"))
options->enable(Algorithms::FoldDequantize);
if (arser.get<bool>("--fold_dwconv"))
@@ -524,8 +276,12 @@ int entry(int argc, char **argv)
options->enable(Algorithms::ResolveCustomOpMatMul);
if (arser.get<bool>("--resolve_customop_max_pool_with_argmax"))
options->enable(Algorithms::ResolveCustomOpMaxPoolWithArgmax);
+ if (arser.get<bool>("--resolve_customop_splitv"))
+ options->enable(Algorithms::ResolveCustomOpSplitV);
if (arser.get<bool>("--shuffle_weight_to_16x1float32"))
options->enable(Algorithms::ShuffleWeightTo16x1Float32);
+ if (arser.get<bool>("--replace_non_const_fc_with_batch_matmul"))
+ options->enable(Algorithms::ReplaceNonConstFCWithBatchMatMul);
if (arser.get<bool>("--substitute_pack_to_reshape"))
options->enable(Algorithms::SubstitutePackToReshape);
if (arser.get<bool>("--substitute_padv2_to_pad"))
@@ -595,37 +351,11 @@ int entry(int argc, char **argv)
csv_tokenize(csv_nodes, new_outputs);
}
- // Load model from the file
- foder::FileLoader file_loader{input_path};
- std::vector<char> model_data;
-
- try
- {
- model_data = file_loader.load();
- }
- catch (const std::runtime_error &err)
- {
- std::cerr << err.what() << std::endl;
- return EXIT_FAILURE;
- }
-
- flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
- if (!circle::VerifyModelBuffer(verifier))
- {
- std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
- return EXIT_FAILURE;
- }
-
- const circle::Model *circle_model = circle::GetModel(model_data.data());
- if (circle_model == nullptr)
- {
- std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
- return EXIT_FAILURE;
- }
-
// Import from input Circle file
- luci::Importer importer;
- auto module = importer.importModule(circle_model);
+ luci::ImporterEx importerex;
+ auto module = importerex.importVerifyModule(input_path);
+ if (module.get() == nullptr)
+ return EXIT_FAILURE;
if (change_outputs)
{
diff --git a/compiler/circlechef/tools/file/Driver.cpp b/compiler/circlechef/tools/file/Driver.cpp
index 76d0f3f7f..9c4256b40 100644
--- a/compiler/circlechef/tools/file/Driver.cpp
+++ b/compiler/circlechef/tools/file/Driver.cpp
@@ -28,10 +28,8 @@
int entry(int argc, char **argv)
{
arser::Arser arser;
- arser.add_argument("recipe")
- .type(arser::DataType::STR)
- .help("Source recipe file path to convert");
- arser.add_argument("circle").type(arser::DataType::STR).help("Target circle file path");
+ arser.add_argument("recipe").help("Source recipe file path to convert");
+ arser.add_argument("circle").help("Target circle file path");
try
{
diff --git a/compiler/circlechef/tools/reverse/Driver.cpp b/compiler/circlechef/tools/reverse/Driver.cpp
index 639e0af6f..c8ef07c6f 100644
--- a/compiler/circlechef/tools/reverse/Driver.cpp
+++ b/compiler/circlechef/tools/reverse/Driver.cpp
@@ -25,10 +25,8 @@
int entry(int argc, char **argv)
{
arser::Arser arser;
- arser.add_argument("circle")
- .type(arser::DataType::STR)
- .help("Source circle file path to convert");
- arser.add_argument("recipe").type(arser::DataType::STR).help("Target recipe file path");
+ arser.add_argument("circle").help("Source circle file path to convert");
+ arser.add_argument("recipe").help("Target recipe file path");
try
{
diff --git a/compiler/circledump/CMakeLists.txt b/compiler/circledump/CMakeLists.txt
index b65c06677..7485ff8e7 100644
--- a/compiler/circledump/CMakeLists.txt
+++ b/compiler/circledump/CMakeLists.txt
@@ -10,6 +10,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
add_executable(circledump ${DRIVER} ${SOURCES})
target_include_directories(circledump PRIVATE include)
target_link_libraries(circledump arser)
+target_link_libraries(circledump foder)
target_link_libraries(circledump mio_circle04)
target_link_libraries(circledump mio_circle04_helper)
target_link_libraries(circledump safemain)
diff --git a/compiler/circledump/driver/Driver.cpp b/compiler/circledump/driver/Driver.cpp
index 657f24fe0..5b0871a91 100644
--- a/compiler/circledump/driver/Driver.cpp
+++ b/compiler/circledump/driver/Driver.cpp
@@ -15,7 +15,7 @@
*/
#include <arser/arser.h>
-#include <circleread/Model.h>
+#include <foder/FileLoader.h>
#include <circledump/Dump.h>
#include <iostream>
@@ -23,7 +23,7 @@
int entry(int argc, char **argv)
{
arser::Arser arser;
- arser.add_argument("circle").type(arser::DataType::STR).help("Circle file path to dump");
+ arser.add_argument("circle").help("Circle file path to dump");
try
{
@@ -38,14 +38,10 @@ int entry(int argc, char **argv)
std::string circle_path = arser.get<std::string>("circle");
// Load Circle model from a circle file
- std::unique_ptr<circleread::Model> model = circleread::load_circle(circle_path);
- if (model == nullptr)
- {
- std::cerr << "ERROR: Failed to load circle '" << circle_path << "'" << std::endl;
- return 255;
- }
-
- const circle::Model *circlemodel = model->model();
+ foder::FileLoader fileLoader{circle_path};
+ std::vector<char> modelData = fileLoader.load();
+ const circle::Model *circlemodel = circle::GetModel(modelData.data());
+ // const circle::Model *circlemodel = model->model();
if (circlemodel == nullptr)
{
std::cerr << "ERROR: Failed to load circle '" << circle_path << "'" << std::endl;
diff --git a/compiler/circledump/include/circleread/Model.h b/compiler/circledump/include/circleread/Model.h
deleted file mode 100644
index 234db8b4c..000000000
--- a/compiler/circledump/include/circleread/Model.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLEREAD_MODEL_H__
-#define __CIRCLEREAD_MODEL_H__
-
-#include <mio/circle/schema_generated.h>
-
-#include <memory>
-
-namespace circleread
-{
-
-struct Model
-{
- virtual ~Model() = default;
-
- virtual const ::circle::Model *model(void) const = 0;
-};
-
-/**
- * @brief Load Circle model (as a raw Model) from a given path
- *
- * @note May return a nullptr
- */
-std::unique_ptr<Model> load_circle(const std::string &path);
-
-} // namespace circleread
-
-#endif // __CIRCLEREAD_MODEL_H__
diff --git a/compiler/circledump/requires.cmake b/compiler/circledump/requires.cmake
index 362d67cf4..183dfe227 100644
--- a/compiler/circledump/requires.cmake
+++ b/compiler/circledump/requires.cmake
@@ -1,3 +1,4 @@
require("arser")
+require("foder")
require("mio-circle04")
require("safemain")
diff --git a/compiler/circledump/src/Dump.cpp b/compiler/circledump/src/Dump.cpp
index 0b256dda8..69427a20e 100644
--- a/compiler/circledump/src/Dump.cpp
+++ b/compiler/circledump/src/Dump.cpp
@@ -16,8 +16,8 @@
#include <circledump/Dump.h>
#include <mio_circle/Helper.h>
+#include <mio_circle/Reader.h>
-#include "Read.h"
#include "OpPrinter.h"
#include "MetadataPrinter.h"
@@ -122,7 +122,7 @@ std::ostream &operator<<(std::ostream &os, const flatbuffers::Vector<T> *fbvect)
return os;
}
-void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
+void dump_sub_graph(std::ostream &os, mio::circle::Reader &reader)
{
auto tensors = reader.tensors();
auto operators = reader.operators();
@@ -150,14 +150,14 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
std::vector<int32_t> dims = {-1};
if (tensor->shape())
- dims = circleread::as_index_vector(tensor->shape());
+ dims = mio::circle::as_index_vector(tensor->shape());
os << "T(" << reader.subgraph_index() << ":" << i << ") " << mio::circle::tensor_type(tensor)
<< " ";
os << "(" << dims << ") ";
if (tensor->shape_signature())
{
- std::vector<int32_t> dims_sig = circleread::as_index_vector(tensor->shape_signature());
+ std::vector<int32_t> dims_sig = mio::circle::as_index_vector(tensor->shape_signature());
os << "(" << dims_sig << ") ";
}
os << "B(" << tensor->buffer() << ") ";
@@ -299,8 +299,8 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
const auto op = operators->Get(i);
circle::BuiltinOperator builtincode = reader.builtin_code(op);
- const std::vector<int32_t> &inputs = circleread::as_index_vector(op->inputs());
- const std::vector<int32_t> &outputs = circleread::as_index_vector(op->outputs());
+ const std::vector<int32_t> &inputs = mio::circle::as_index_vector(op->inputs());
+ const std::vector<int32_t> &outputs = mio::circle::as_index_vector(op->outputs());
auto op_name = reader.opcode_name(op);
os << "O(" << reader.subgraph_index() << ":" << i << ") " << op_name << " ";
@@ -356,7 +356,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
void dump_model(std::ostream &os, const circle::Model *model)
{
- circleread::Reader reader(model);
+ mio::circle::Reader reader(model);
uint32_t num_subgraph = reader.num_subgraph();
diff --git a/compiler/circledump/src/Load.cpp b/compiler/circledump/src/Load.cpp
deleted file mode 100644
index 67e7fa5a6..000000000
--- a/compiler/circledump/src/Load.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <circleread/Model.h>
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-namespace
-{
-
-class MemoryMappedModel final : public circleread::Model
-{
-public:
- /**
- * @require fd and data SHOULD be valid
- */
- explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size}
- {
- // DO NOTHING
- }
-
-public:
- ~MemoryMappedModel()
- {
- munmap(_data, _size);
- close(_fd);
- }
-
-public:
- MemoryMappedModel(const MemoryMappedModel &) = delete;
- MemoryMappedModel(MemoryMappedModel &&) = delete;
-
-public:
- const ::circle::Model *model(void) const override { return ::circle::GetModel(_data); }
-
-private:
- int _fd = -1;
- void *_data = nullptr;
- size_t _size = 0;
-};
-
-class FileDescriptor final
-{
-public:
- FileDescriptor(int value) : _value{value}
- {
- // DO NOTHING
- }
-
-public:
- // NOTE Copy is not allowed
- FileDescriptor(const FileDescriptor &) = delete;
-
-public:
- // NOTE Move is allowed
- FileDescriptor(FileDescriptor &&fd) { _value = fd.release(); }
-
-public:
- ~FileDescriptor()
- {
- if (_value != -1)
- {
- // Close on destructor
- close(_value);
- }
- }
-
-public:
- int value(void) const { return _value; }
-
-public:
- int release(void)
- {
- auto res = _value;
- _value = -1;
- return res;
- }
-
-private:
- int _value = -1;
-};
-
-} // namespace
-
-namespace circleread
-{
-
-std::unique_ptr<Model> load_circle(const std::string &path)
-{
- FileDescriptor fd = open(path.c_str(), O_RDONLY);
-
- if (fd.value() == -1)
- {
- // Return nullptr on open failure
- return nullptr;
- }
-
- struct stat st;
- if (fstat(fd.value(), &st) == -1)
- {
- // Return nullptr on fstat failure
- return nullptr;
- }
-
- auto size = st.st_size;
- auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.value(), 0);
-
- if (data == MAP_FAILED)
- {
- // Return nullptr on mmap failure
- return nullptr;
- }
-
- return std::unique_ptr<circleread::Model>{new MemoryMappedModel(fd.release(), data, size)};
-}
-
-} // namespace circleread
diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp
index 02e5c26b5..817371dcf 100644
--- a/compiler/circledump/src/OpPrinter.cpp
+++ b/compiler/circledump/src/OpPrinter.cpp
@@ -15,7 +15,8 @@
*/
#include "OpPrinter.h"
-#include "Read.h"
+
+#include <mio_circle/Helper.h>
#include <memory>
@@ -233,7 +234,7 @@ public:
{
if (auto *reshape_params = op->builtin_options_as_ReshapeOptions())
{
- auto new_shape = circleread::as_index_vector(reshape_params->new_shape());
+ auto new_shape = mio::circle::as_index_vector(reshape_params->new_shape());
os << " ";
os << "NewShape(" << new_shape << ")";
os << std::endl;
@@ -802,6 +803,7 @@ OpPrinterRegistry::OpPrinterRegistry()
// There is no Option for CEIL
_op_map[circle::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationPrinter>();
_op_map[circle::BuiltinOperator_CONV_2D] = make_unique<Conv2DPrinter>();
+ // There is no Option for DENSIFY
_op_map[circle::BuiltinOperator_DEPTH_TO_SPACE] = make_unique<DepthToSpacePrinter>();
_op_map[circle::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique<DepthwiseConv2DPrinter>();
// There is no Option for DEQUANTIZE
diff --git a/compiler/circledump/src/Read.cpp b/compiler/circledump/src/Read.cpp
deleted file mode 100644
index 3a7e98cde..000000000
--- a/compiler/circledump/src/Read.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Read.h"
-
-#include <mio_circle/Helper.h>
-
-#include <sstream>
-#include <string>
-
-namespace circleread
-{
-
-Reader::Reader(const circle::Model *model)
-{
- _version = model->version();
- _subgraphs = model->subgraphs();
- _buffers = model->buffers();
- _metadata = model->metadata();
- _signature_defs = model->signature_defs();
-
- auto opcodes = model->operator_codes();
- for (const ::circle::OperatorCode *opcode : *opcodes)
- {
- _op_codes.push_back(opcode);
- }
-}
-
-size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
-{
- *buff_data = nullptr;
-
- if (buf_idx == 0)
- return 0;
-
- if (auto *buffer = (*_buffers)[buf_idx])
- {
- if (auto *array = buffer->data())
- {
- if (size_t size = array->size())
- {
- *buff_data = reinterpret_cast<const uint8_t *>(array->data());
- return size;
- }
- }
- }
-
- return 0;
-}
-
-circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
-{
- uint32_t index = op->opcode_index();
- assert(index < _op_codes.size());
- const circle::OperatorCode *opcode = _op_codes.at(index);
-
- return opcode->builtin_code();
-}
-
-std::string Reader::opcode_name(const circle::Operator *op) const
-{
- uint32_t index = op->opcode_index();
- assert(index < _op_codes.size());
- const circle::OperatorCode *opcode = _op_codes.at(index);
-
- if (!mio::circle::is_valid(opcode))
- {
- std::ostringstream oss;
- oss << "(invalid: " << index << ")";
- return oss.str();
- }
-
- return mio::circle::opcode_name(opcode);
-}
-
-bool Reader::select_subgraph(uint32_t sgindex)
-{
- _subgraph_index = sgindex;
- _tensors = nullptr;
- _operators = nullptr;
-
- _inputs.clear();
- _outputs.clear();
-
- if (_subgraphs->Length() <= sgindex)
- {
- assert(false);
- return false;
- }
-
- const circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
-
- auto name = subgraph->name();
- _subgraph_name = name ? name->c_str() : "(noname)";
-
- _tensors = subgraph->tensors();
- _operators = subgraph->operators();
- _data_format = subgraph->data_format();
-
- _inputs = as_index_vector(subgraph->inputs());
- _outputs = as_index_vector(subgraph->outputs());
-
- return true;
-}
-
-} // namespace circleread
diff --git a/compiler/circledump/src/Read.h b/compiler/circledump/src/Read.h
deleted file mode 100644
index 05b0e5072..000000000
--- a/compiler/circledump/src/Read.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLEREAD_READ_H__
-#define __CIRCLEREAD_READ_H__
-
-#include <mio/circle/schema_generated.h>
-
-#include <map>
-#include <string>
-#include <vector>
-
-namespace circleread
-{
-
-template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
-{
- if (flat_array == nullptr)
- {
- throw std::runtime_error("flat array is nullptr");
- }
-
- std::vector<T> ret(flat_array->Length());
- for (uint32_t i = 0; i < flat_array->Length(); i++)
- {
- ret[i] = flat_array->Get(i);
- }
- return ret;
-}
-
-/**
- * @brief Loads Circle file and provides helpers to access attributes
- */
-class Reader
-{
-private:
- using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
- using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
- using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
- using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
- using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>;
- using CircleSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>;
-
-public:
- Reader(const circle::Model *model);
-
- Reader() = delete;
-
-public:
- uint32_t version() const { return _version; }
-
- const std::vector<const circle::OperatorCode *> &opcodes() { return _op_codes; }
- const CircleBuffers_t *buffers() { return _buffers; }
- const CircleTensors_t *tensors() { return _tensors; }
- const CircleOperators_t *operators() { return _operators; }
- const std::vector<int32_t> &inputs() const { return _inputs; }
- const std::vector<int32_t> &outputs() const { return _outputs; }
- const circle::DataFormat &data_format() const { return _data_format; }
- const CircleMetadata_t *metadata() const { return _metadata; }
- const CircleSignatureDef_t *signature_defs() const { return _signature_defs; }
-
- uint32_t num_subgraph() const { return _subgraphs->Length(); }
-
- size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
- circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
- std::string opcode_name(const circle::Operator *op) const;
-
-public:
- bool select_subgraph(uint32_t subgraph);
- const std::string &subgraph_name(void) const { return _subgraph_name; }
- uint32_t subgraph_index(void) const { return _subgraph_index; }
-
-private:
- uint32_t _version;
-
- const CircleSubGraphs_t *_subgraphs{nullptr};
- const CircleBuffers_t *_buffers{nullptr};
- const CircleTensors_t *_tensors{nullptr};
- const CircleOperators_t *_operators{nullptr};
- const CircleMetadata_t *_metadata{nullptr};
- const CircleSignatureDef_t *_signature_defs{nullptr};
-
- uint32_t _subgraph_index = 0;
- std::string _subgraph_name;
- std::vector<const circle::OperatorCode *> _op_codes;
- std::vector<int32_t> _inputs;
- std::vector<int32_t> _outputs;
- circle::DataFormat _data_format = circle::DataFormat::DataFormat_CHANNELS_FIRST;
-};
-
-} // namespace circleread
-
-#endif // __CIRCLEREAD_READ_H__
diff --git a/compiler/cli/CMakeLists.txt b/compiler/cli/CMakeLists.txt
index 0fb99ddba..4ab0ea218 100644
--- a/compiler/cli/CMakeLists.txt
+++ b/compiler/cli/CMakeLists.txt
@@ -10,5 +10,5 @@ endif(NOT ENABLE_TEST)
nnas_find_package(GTest QUIET)
-GTest_AddTEst(cli_test ${TESTS})
+GTest_AddTest(cli_test ${TESTS})
target_link_libraries(cli_test cli)
diff --git a/compiler/coco/core/src/IR/Module.cpp b/compiler/coco/core/src/IR/Module.cpp
index 420cf6f0c..0db78941c 100644
--- a/compiler/coco/core/src/IR/Module.cpp
+++ b/compiler/coco/core/src/IR/Module.cpp
@@ -144,7 +144,7 @@ std::unique_ptr<Module> Module::create(void)
m->_input = make_unique<coco::InputList>();
m->_output = make_unique<coco::OutputList>();
- return std::move(m);
+ return m;
}
} // namespace coco
diff --git a/compiler/coco/generic/src/IR/Data.cpp b/compiler/coco/generic/src/IR/Data.cpp
index 5ab7069ee..361dcc243 100644
--- a/compiler/coco/generic/src/IR/Data.cpp
+++ b/compiler/coco/generic/src/IR/Data.cpp
@@ -209,8 +209,7 @@ std::unique_ptr<Data> Data::create(void)
data->_blob = std::move(blob);
data->_fp32 = std::move(fp32);
- // GCC 4.9 tries to copy data (while GCC 6.X doesn't)
- return std::move(data);
+ return data;
}
} // namespace coco
diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt
index 404149c15..34a3a4d7d 100644
--- a/compiler/common-artifacts/CMakeLists.txt
+++ b/compiler/common-artifacts/CMakeLists.txt
@@ -12,14 +12,6 @@ if(${PYTHON_VERSION_MINOR} LESS 8)
return()
endif()
-# Create python virtual environment with tensorflow 2.6.0
-set(VIRTUALENV_OVERLAY_TF_2_6_0 "${NNCC_OVERLAY_DIR}/venv_2_6_0")
-
-add_custom_command(
- OUTPUT ${VIRTUALENV_OVERLAY_TF_2_6_0}
- COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_6_0}
-)
-
# Create python virtual environment with tensorflow 2.8.0
set(VIRTUALENV_OVERLAY_TF_2_8_0 "${NNCC_OVERLAY_DIR}/venv_2_8_0")
@@ -30,33 +22,36 @@ add_custom_command(
# Create requirements.txt and install required pip packages
set(REQUIREMENTS_FILE "requirements.txt")
-set(REQUIREMENTS_OVERLAY_PATH_TF_2_6_0 "${VIRTUALENV_OVERLAY_TF_2_6_0}/${REQUIREMENTS_FILE}")
set(REQUIREMENTS_OVERLAY_PATH_TF_2_8_0 "${VIRTUALENV_OVERLAY_TF_2_8_0}/${REQUIREMENTS_FILE}")
-add_custom_command(
- OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
- COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
- COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.6.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
- COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
- COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python3.8 -m pip --default-timeout=1000 install --upgrade pip setuptools
- COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python3.8 -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} --upgrade
- DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0}
-)
+set(PYTHON_OVERLAY python3)
+if(PYTHON_EXECUTABLE MATCHES python3.8)
+ set(PYTHON_OVERLAY python3.8)
+endif()
+# NOTE when using behind proxy with self signed certificate, need to set '--trusted-host' options
+set(PIP_OPTION_TRUSTED_HOST )
+if(DEFINED ENV{ONE_PIP_OPTION_TRUST_HOST})
+ set(PIP_OPTION_TRUSTED_HOST --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --trusted-host pypi.org)
+endif()
+
+# NOTE refer https://github.com/protocolbuffers/protobuf/issues/10051
+# TODO remove protobuf==3.20.1 when issue is resolved
add_custom_command(
OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.8.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
- COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/python3.8 -m pip --default-timeout=1000 install --upgrade pip setuptools
- COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/python3.8 -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} --upgrade
+ COMMAND ${CMAKE_COMMAND} -E echo "protobuf==3.20.1" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
+ COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
+ ${PIP_OPTION_TRUSTED_HOST} install --upgrade pip setuptools
+ COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/${PYTHON_OVERLAY} -m pip --default-timeout=1000
+ ${PIP_OPTION_TRUSTED_HOST} install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} --upgrade
DEPENDS ${VIRTUALENV_OVERLAY_TF_2_8_0}
)
add_custom_target(common_artifacts_python_deps ALL
- DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0}
- ${VIRTUALENV_OVERLAY_TF_2_8_0}
- ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
+ DEPENDS ${VIRTUALENV_OVERLAY_TF_2_8_0}
${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
)
@@ -246,7 +241,13 @@ foreach(RECIPE IN ITEMS ${RECIPES})
if(NOT DEFINED NO_OPTIMIZE_${RECIPE})
# Generate optimized .circle
add_custom_command(OUTPUT ${OPT_CIRCLE_OUTPUT_PATH}
- COMMAND $<TARGET_FILE:circle2circle> --O1 ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH}
+ # NOTE --resolve_customop_add is just to added for old -O1, no particular meaning
+ # --fold_dequantize is added to fold Tensor(FLOAT16) + DEQUANTIZE (Net_Dequantize_Add)
+ # model. FLOAT16 in general is NOT supported but only Tensor(FLOAT16) + DEQUANTIZE
+ # sequence accepted as folded to Tensor(FLOAT32).
+ # TODO revise giving options from the list file
+ COMMAND $<TARGET_FILE:circle2circle> --resolve_customop_add --fold_dequantize --fold_densify
+ ${CIRCLE_OUTPUT_PATH} ${OPT_CIRCLE_OUTPUT_PATH}
DEPENDS $<TARGET_FILE:circle2circle> ${CIRCLE_OUTPUT_PATH}
COMMENT "Generate ${OPT_CIRCLE_FILE}"
)
diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst
index 92b07fde8..2275a42d9 100644
--- a/compiler/common-artifacts/exclude.lst
+++ b/compiler/common-artifacts/exclude.lst
@@ -32,6 +32,7 @@ tcgenerate(BroadcastTo_000) # luci-interpreter doesn't support custom operator
tcgenerate(Ceil_000)
tcgenerate(Conv2D_003) # runtime doesn't support dilation
tcgenerate(Cos_000)
+tcgenerate(Densify_000) # luci-interpreter doesn't support
tcgenerate(DepthwiseConv2D_001) # runtime doesn't support dilation
tcgenerate(DepthwiseConv2D_003) # runtime doesn't support dilation
tcgenerate(DepthwiseConv2D_U8_001) # luci-interpreter doesn't support channel-wise quantization yet
@@ -67,6 +68,8 @@ tcgenerate(Neg_000)
tcgenerate(Net_BroadcastTo_AddV2_001) # luci-interpreter doesn't support custom operator
tcgenerate(Net_Conv_FakeQuant_000) # luci-interpreter doesn't support FakeQuant yet
tcgenerate(Net_Dangle_001)
+tcgenerate(Net_Densify_Add_000) # luci-interpreter doesn't support Densify yet
+tcgenerate(Net_Densify_Dequantize_Add_000) # luci-interpreter doesn't support Densify/Dequantize yet
tcgenerate(Net_Gather_SparseToDense_AddV2_000) # luci-interpreter doesn't support custom operator
tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim
tcgenerate(OneHot_000)
diff --git a/compiler/common-artifacts/src/TestDataGenerator.cpp b/compiler/common-artifacts/src/TestDataGenerator.cpp
index 33cecbbe2..7481050c5 100644
--- a/compiler/common-artifacts/src/TestDataGenerator.cpp
+++ b/compiler/common-artifacts/src/TestDataGenerator.cpp
@@ -142,23 +142,15 @@ void fill_random_range(void *data, uint32_t size, loco::DataType dtype, int32_t
int entry(int argc, char **argv)
{
arser::Arser arser;
- arser.add_argument("circle").type(arser::DataType::STR).help("Circle file you want to test");
- arser.add_argument("--input_data")
- .required(true)
- .nargs(1)
- .type(arser::DataType::STR)
- .help("Path to generate input data h5 file");
+ arser.add_argument("circle").help("Circle file you want to test");
+ arser.add_argument("--input_data").required(true).help("Path to generate input data h5 file");
arser.add_argument("--expected_data")
.required(true)
- .nargs(1)
- .type(arser::DataType::STR)
.help("Path to generate expected data h5 file");
arser.add_argument("--fixed_seed")
- .required(false)
.nargs(0)
.help("Put a fixed seed into the random number generator");
arser.add_argument("--input_range")
- .required(false)
.nargs(3)
.type(arser::DataType::STR_VEC)
.help("Set random number range [min max] for the input as 'name min max'");
diff --git a/compiler/crew/CMakeLists.txt b/compiler/crew/CMakeLists.txt
index 1824d86ab..45cda7562 100644
--- a/compiler/crew/CMakeLists.txt
+++ b/compiler/crew/CMakeLists.txt
@@ -12,9 +12,12 @@ if(NOT ENABLE_TEST)
return()
endif(NOT ENABLE_TEST)
+configure_file("src/test_read_semicolon.ini" "test_read_semicolon.ini" COPYONLY)
+
nnas_find_package(GTest REQUIRED)
GTest_AddTest(crew_test ${TESTS})
target_include_directories(crew_test PRIVATE src)
target_link_libraries(crew_test nncc_common)
target_link_libraries(crew_test crew)
+target_link_libraries(crew_test foder)
diff --git a/compiler/crew/src/PConfigIni.cpp b/compiler/crew/src/PConfigIni.cpp
index f0e3e8e01..5177843bf 100644
--- a/compiler/crew/src/PConfigIni.cpp
+++ b/compiler/crew/src/PConfigIni.cpp
@@ -26,10 +26,36 @@
#include <sstream>
#include <stdexcept>
#include <string>
+#include <vector>
namespace crew
{
+namespace
+{
+
+std::string filter_escape(const std::string &source)
+{
+ std::string key = source;
+
+ // if key is surrounded with quotation
+ // TODO for quotation
+
+ // if key has '\\' + ';', remove '\\'
+ auto pos = key.find("\\;");
+ while (pos != std::string::npos)
+ {
+ auto k1 = key.substr(0, pos);
+ auto k2 = key.substr(pos + 1);
+ key = k1 + k2;
+ pos = key.find("\\;");
+ }
+
+ return key;
+}
+
+} // namespace
+
Sections read_ini(const char *data, size_t length)
{
assert(data != nullptr);
@@ -84,6 +110,7 @@ Sections read_ini(const char *data, size_t length)
{
auto key = string_line.substr(0, pos);
auto val = string_line.substr(pos + 1);
+ key = filter_escape(key);
section.items.emplace(key, val);
}
}
@@ -107,11 +134,53 @@ Sections read_ini(const std::string &path)
return read_ini(ini_data.data(), ini_data.size());
}
+namespace
+{
+
+void replace(std::string &source, const std::string &token, const std::string &replace)
+{
+ size_t pos = 0;
+ while ((pos = source.find(token, pos)) != std::string::npos)
+ {
+ source.replace(pos, token.length(), replace);
+ pos += replace.length(); // Handles the case where 'replace' is a substring of 'token'
+ }
+}
+
+Sections insert_escape(const Sections &inputs)
+{
+ Sections sections;
+
+ // for all section in sections;
+ // if key has ';' then replace with '\;'
+ for (auto &input : inputs)
+ {
+ Section section;
+ section.name = input.name;
+
+ for (auto &item : input.items)
+ {
+ auto key = item.first;
+ auto value = item.second;
+
+ replace(key, ";", "\\;");
+ section.items[key] = value;
+ }
+ sections.push_back(section);
+ }
+
+ return sections;
+}
+
+} // namespace
+
void write_ini(std::ostream &os, const Sections &sections)
{
std::stringstream ss;
- ss << sections;
+ auto processed = insert_escape(sections);
+
+ ss << processed;
std::string strss = ss.str();
diff --git a/compiler/crew/src/PConfigIni.test.cpp b/compiler/crew/src/PConfigIni.test.cpp
index bdd2ccc1f..c062c6937 100644
--- a/compiler/crew/src/PConfigIni.test.cpp
+++ b/compiler/crew/src/PConfigIni.test.cpp
@@ -17,12 +17,14 @@
#include "crew/PConfigIni.h"
#include "crew/PConfigIniDump.h"
+#include <foder/FileLoader.h>
+
#include <gtest/gtest.h>
#include <sstream>
#include <stdexcept>
-TEST(ConfigIniTest, read_ini_non_exist_file)
+TEST(ConfigIniTest, read_ini_non_exist_file_NEG)
{
EXPECT_THROW(crew::read_ini("/hello/world/not_a_file"), std::runtime_error);
}
@@ -85,3 +87,60 @@ TEST(ConfigIniTest, write_ini_file_error_NEG)
crew::Sections sections;
EXPECT_THROW(crew::write_ini("/abc/def/cannot_access", sections), std::runtime_error);
}
+
+TEST(ConfigIniTest, read_file_escape_semicolon)
+{
+ auto sections = crew::read_ini("test_read_semicolon.ini");
+ ASSERT_EQ(1UL, sections.size());
+
+ auto its = sections.begin();
+ ASSERT_NE(sections.end(), its);
+ EXPECT_TRUE("hello" == its->name);
+ ASSERT_EQ(1UL, its->items.size());
+
+ auto it = its->items.begin();
+ ASSERT_NE(its->items.end(), it);
+
+ EXPECT_TRUE("keya;keyb;keyc;keyd" == it->first);
+ EXPECT_TRUE("world" == it->second);
+}
+
+TEST(ConfigIniTest, write_file_escape_semicolon)
+{
+ std::string path("test_write_semicolon.ini");
+
+ // save key with ';'
+ {
+ crew::Sections sections;
+ crew::Section hello;
+ hello.name = "hello";
+ hello.items["keya;keyb;keyc;keyd"] = "world";
+ sections.push_back(hello);
+ crew::write_ini(path, sections);
+ }
+
+ // load the file and check if there is '\\'
+ std::string strbuffer;
+ {
+ foder::FileLoader file_loader{path};
+ auto ini_data = file_loader.load();
+
+ auto buffer = std::vector<char>();
+ auto length = ini_data.size();
+ buffer.reserve(length + 1);
+
+ char *pbuffer = buffer.data();
+ memcpy(pbuffer, ini_data.data(), length);
+ *(pbuffer + length) = 0;
+
+ strbuffer = pbuffer;
+ }
+ int32_t count = 0;
+ size_t pos = 0;
+ while ((pos = strbuffer.find("\\;", pos)) != std::string::npos)
+ {
+ count++;
+ pos++;
+ }
+ EXPECT_TRUE(count == 3);
+}
diff --git a/compiler/crew/src/test_read_semicolon.ini b/compiler/crew/src/test_read_semicolon.ini
new file mode 100644
index 000000000..d966fb707
--- /dev/null
+++ b/compiler/crew/src/test_read_semicolon.ini
@@ -0,0 +1,2 @@
+[hello]
+keya\;keyb\;keyc\;keyd=world
diff --git a/compiler/enco/core/src/CppGen/Host.cpp b/compiler/enco/core/src/CppGen/Host.cpp
index 7f9456239..63baf0b31 100644
--- a/compiler/enco/core/src/CppGen/Host.cpp
+++ b/compiler/enco/core/src/CppGen/Host.cpp
@@ -299,7 +299,7 @@ std::unique_ptr<pp::MultiLineText> HostBlockCompiler::compile(const coco::Block
res->append(ins->accept(prn));
}
- return std::move(res);
+ return res;
}
} // namespace enco
diff --git a/compiler/enco/core/src/CppGen/Subnet.cpp b/compiler/enco/core/src/CppGen/Subnet.cpp
index 599b0794e..3fc14edf5 100644
--- a/compiler/enco/core/src/CppGen/Subnet.cpp
+++ b/compiler/enco/core/src/CppGen/Subnet.cpp
@@ -373,7 +373,7 @@ std::unique_ptr<SubnetStruct> SubnetStructBuilder::build(const ANNBinder *binder
// Finalize compilation
res->ctor()->append("ANeuralNetworksCompilation_finish(", cname, ");");
- return std::move(res);
+ return res;
}
std::unique_ptr<pp::MultiLineText> SubnetBlockCompiler::compile(const ANNBinder *binder) const
@@ -415,7 +415,7 @@ std::unique_ptr<pp::MultiLineText> SubnetBlockCompiler::compile(const ANNBinder
res->append("ANeuralNetworksExecution_free(execution);");
- return std::move(res);
+ return res;
}
} // namespace enco
diff --git a/compiler/enco/core/src/Transforms/Split.cpp b/compiler/enco/core/src/Transforms/Split.cpp
index 714c27a72..4bb21b0a7 100644
--- a/compiler/enco/core/src/Transforms/Split.cpp
+++ b/compiler/enco/core/src/Transforms/Split.cpp
@@ -656,7 +656,7 @@ public:
app->ofm(ofm);
app->ker(ker);
- return std::move(app);
+ return app;
}
else
{
@@ -676,7 +676,7 @@ public:
app->ofm(ofm);
app->ker(ker);
- return std::move(app);
+ return app;
}
}
}
@@ -704,7 +704,7 @@ public:
app->right(right);
app->out(out);
- return std::move(app);
+ return app;
}
}
else if (auto op = eval->op()->asMul())
@@ -731,7 +731,7 @@ public:
app->right(right);
app->out(out);
- return std::move(app);
+ return app;
}
}
else if (auto op = eval->op()->asPadF())
@@ -754,7 +754,7 @@ public:
app->ifm(ifm);
app->ofm(ofm);
- return std::move(app);
+ return app;
}
}
else if (auto maxpool = eval->op()->asMaxPool2D())
@@ -779,7 +779,7 @@ public:
app->ifm(ifm);
app->ofm(ofm);
- return std::move(app);
+ return app;
}
}
else if (auto avgpool = eval->op()->asAvgPool2D())
@@ -808,7 +808,7 @@ public:
app->ifm(ifm);
app->ofm(ofm);
- return std::move(app);
+ return app;
}
}
}
@@ -831,7 +831,7 @@ public:
app->ifm(ifm);
app->ofm(ofm);
- return std::move(app);
+ return app;
}
}
else if (auto relu6 = eval->op()->asReLU6())
@@ -853,7 +853,7 @@ public:
app->ifm(ifm);
app->ofm(ofm);
- return std::move(app);
+ return app;
}
}
else if (auto op = eval->op()->asConcatF())
@@ -880,7 +880,7 @@ public:
app->right(right);
app->out(out);
- return std::move(app);
+ return app;
}
}
else if (auto op = eval->op()->asSub())
@@ -907,7 +907,7 @@ public:
app->right(right);
app->out(out);
- return std::move(app);
+ return app;
}
}
else if (auto op = eval->op()->asDiv())
@@ -934,7 +934,7 @@ public:
app->right(right);
app->out(out);
- return std::move(app);
+ return app;
}
}
@@ -967,7 +967,7 @@ std::unique_ptr<ANNOpAppender> make_appender(coco::Instr *ins)
app->left(depth_concat->fst()->asFeature());
app->right(depth_concat->snd()->asFeature());
- return std::move(app);
+ return app;
}
// Build ANN IR from ANNConv2D instruction
@@ -986,7 +986,7 @@ std::unique_ptr<ANNOpAppender> make_appender(coco::Instr *ins)
app->ker(conv2d->ker()->asKernel());
app->bias(coco::safe_cast<coco::FeatureObject>(conv2d->bias()));
- return std::move(app);
+ return app;
}
return nullptr;
diff --git a/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp
index aa2cad705..32ad44385 100644
--- a/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp
+++ b/compiler/exo/src/Conversion/DepthwiseConv2DConverter.cpp
@@ -25,6 +25,8 @@
#include <loco/Service/TypeInference.h>
#include <loco/Service/ShapeInference.h>
+#include <limits>
+
namespace exo
{
diff --git a/compiler/kuma/src/IntervalSet.h b/compiler/kuma/src/IntervalSet.h
index 3b6c5f666..1e26581c0 100644
--- a/compiler/kuma/src/IntervalSet.h
+++ b/compiler/kuma/src/IntervalSet.h
@@ -17,6 +17,7 @@
#ifndef __KUMA_DETAILS_LIVE_INTERVAL_SET_H__
#define __KUMA_DETAILS_LIVE_INTERVAL_SET_H__
+#include <cstdint>
#include <map>
namespace kuma
diff --git a/compiler/loco/include/loco/IR/DataTypeTraits.h b/compiler/loco/include/loco/IR/DataTypeTraits.h
index 1f78c9fec..6be46c3b3 100644
--- a/compiler/loco/include/loco/IR/DataTypeTraits.h
+++ b/compiler/loco/include/loco/IR/DataTypeTraits.h
@@ -83,6 +83,13 @@ template <> struct DataTypeImpl<DataType::U64>
using Type = uint64_t;
};
+template <> struct DataTypeImpl<DataType::FLOAT16>
+{
+ // float16 type with 16bit value, encoded with help of FP16 library
+ // https://github.com/Maratyszcza/FP16/
+ using Type = uint16_t;
+};
+
template <> struct DataTypeImpl<DataType::FLOAT32>
{
// Use C++ float type for IEEE 32-bit floating-point numbers
@@ -132,6 +139,8 @@ inline uint32_t size(DataType data_type)
return sizeof(DataTypeImpl<DataType::S64>::Type);
case DataType::U64:
return sizeof(DataTypeImpl<DataType::U64>::Type);
+ case DataType::FLOAT16:
+ return sizeof(DataTypeImpl<DataType::FLOAT16>::Type);
case DataType::FLOAT32:
return sizeof(DataTypeImpl<DataType::FLOAT32>::Type);
case DataType::FLOAT64:
diff --git a/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp b/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp
index 500f08623..40ddb133b 100644
--- a/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp
+++ b/compiler/logo/src/Passes/SimplifyDomainConversionPass.cpp
@@ -122,9 +122,6 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
{
using namespace loco;
- auto encoder = encode_node->encoder();
- assert(encoder != nullptr);
-
auto decode_node = dynamic_cast<loco::FeatureDecode *>(encode_node->input());
if (decode_node == nullptr)
{
@@ -132,6 +129,9 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
}
assert(decode_node->input() != nullptr);
+ auto encoder = encode_node->encoder();
+ assert(encoder != nullptr);
+
auto decoder = decode_node->decoder();
assert(decoder != nullptr);
@@ -302,9 +302,6 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
{
using namespace loco;
- auto encoder = encode_node->encoder();
- assert(encoder != nullptr);
-
auto decode_node = dynamic_cast<loco::MatrixDecode *>(encode_node->input());
if (decode_node == nullptr)
{
@@ -312,6 +309,9 @@ bool SimplifyDomainConversionPass::run(loco::Graph *g)
}
assert(decode_node->input() != nullptr);
+ auto encoder = encode_node->encoder();
+ assert(encoder != nullptr);
+
auto decoder = decode_node->decoder();
assert(decoder != nullptr);
diff --git a/compiler/luci-eval-driver/src/EvalDriver.cpp b/compiler/luci-eval-driver/src/EvalDriver.cpp
index 4762cffe7..0ed35431d 100644
--- a/compiler/luci-eval-driver/src/EvalDriver.cpp
+++ b/compiler/luci-eval-driver/src/EvalDriver.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include <luci/Importer.h>
+#include <luci/ImporterEx.h>
#include <luci_interpreter/Interpreter.h>
#include <luci/CircleExporter.h>
#include <luci/CircleFileExpContract.h>
@@ -47,18 +47,6 @@ void writeDataToFile(const std::string &filename, const char *data, size_t data_
}
}
-std::unique_ptr<luci::Module> importModel(const std::string &filename)
-{
- std::ifstream fs(filename, std::ifstream::binary);
- if (fs.fail())
- {
- throw std::runtime_error("Cannot open model file \"" + filename + "\".\n");
- }
- std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
- std::istreambuf_iterator<char>());
- return luci::Importer().importModule(circle::GetModel(model_data.data()));
-}
-
template <typename NodeT> size_t getTensorSize(const NodeT *node)
{
uint32_t tensor_size = loco::size(node->dtype());
@@ -91,7 +79,8 @@ int entry(int argc, char **argv)
const char *output_file = argv[4];
// Load model from the file
- std::unique_ptr<luci::Module> module = importModel(filename);
+ luci::ImporterEx importer;
+ std::unique_ptr<luci::Module> module = importer.importVerifyModule(filename);
if (module == nullptr)
{
std::cerr << "ERROR: Failed to load '" << filename << "'" << std::endl;
diff --git a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
index d134a6b95..f0df58db3 100644
--- a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
@@ -12,6 +12,7 @@ REGISTER_KERNEL(Div)
REGISTER_KERNEL(Elu)
REGISTER_KERNEL(Exp)
REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
REGISTER_KERNEL(Floor)
REGISTER_KERNEL(FloorDiv)
REGISTER_KERNEL(Equal)
@@ -44,6 +45,7 @@ REGISTER_KERNEL(Reshape)
REGISTER_KERNEL(ResizeBilinear)
REGISTER_KERNEL(ResizeNearestNeighbor)
REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
REGISTER_KERNEL(Softmax)
REGISTER_KERNEL(SpaceToBatchND)
REGISTER_KERNEL(SpaceToDepth)
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h
index 15ff0327b..efa6b167e 100644
--- a/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h
@@ -18,7 +18,7 @@
#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+#include "PALreference_ops.h"
namespace luci_interpreter_pal
{
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h
index 6046789ae..effb85d54 100644
--- a/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h
@@ -17,7 +17,7 @@
#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
#define LUCI_INTERPRETER_PAL_QUANTIZE_H
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+#include "PALreference_ops.h"
namespace luci_interpreter_pal
{
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h b/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h
new file mode 100644
index 000000000..813b1ec2c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALreference_ops.h
@@ -0,0 +1,1568 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <type_traits>
+
+#include "third_party/eigen3/Eigen/Core"
+#include "fixedpoint/fixedpoint.h"
+#include "ruy/profiler/instrumentation.h" // from @ruy
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/add.h"
+#include "tensorflow/lite/kernels/internal/reference/add_n.h"
+#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "tensorflow/lite/kernels/internal/reference/cast.h"
+#include "tensorflow/lite/kernels/internal/reference/ceil.h"
+#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
+#include "tensorflow/lite/kernels/internal/reference/conv.h"
+#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
+#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/div.h"
+#include "tensorflow/lite/kernels/internal/reference/elu.h"
+#include "tensorflow/lite/kernels/internal/reference/exp.h"
+#include "tensorflow/lite/kernels/internal/reference/fill.h"
+#include "tensorflow/lite/kernels/internal/reference/floor.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/kernels/internal/reference/gather.h"
+#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
+#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
+#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
+#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
+#include "tensorflow/lite/kernels/internal/reference/mul.h"
+#include "tensorflow/lite/kernels/internal/reference/neg.h"
+#include "tensorflow/lite/kernels/internal/reference/pad.h"
+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "tensorflow/lite/kernels/internal/reference/prelu.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/reference/quantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reduce.h"
+#include "tensorflow/lite/kernels/internal/reference/requantize.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
+#include "tensorflow/lite/kernels/internal/reference/round.h"
+#include "tensorflow/lite/kernels/internal/reference/softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h"
+#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
+#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/sub.h"
+#include "tensorflow/lite/kernels/internal/reference/tanh.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h"
+#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
+#include "tensorflow/lite/kernels/internal/tensor.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite
+{
+
+namespace reference_ops
+{
+
+template <typename T>
+inline void Relu(const RuntimeShape &input_shape, const T *input_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const T val = input_data[i];
+ const T lower = 0;
+ const T clamped = val < lower ? lower : val;
+ output_data[i] = clamped;
+ }
+}
+
+template <typename T>
+inline void Relu1(const RuntimeShape &input_shape, const T *input_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ ruy::profiler::ScopeLabel label("Relu1 (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const T val = input_data[i];
+ const T upper = 1;
+ const T lower = -1;
+ const T clamped = val > upper ? upper : val < lower ? lower : val;
+ output_data[i] = clamped;
+ }
+}
+
+inline void Relu6(const RuntimeShape &input_shape, const float *input_data,
+ const RuntimeShape &output_shape, float *output_data)
+{
+ ruy::profiler::ScopeLabel label("Relu6 (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const float val = input_data[i];
+ const float upper = 6;
+ const float lower = 0;
+ const float clamped = val > upper ? upper : val < lower ? lower : val;
+ output_data[i] = clamped;
+ }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ReluParams &params, const RuntimeShape &input_shape,
+ const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+ ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const int32 val = static_cast<int32_t>(input_data[i]);
+ int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset,
+ params.output_multiplier,
+ params.output_shift);
+ clamped = std::max(params.quantized_activation_min, clamped);
+ clamped = std::min(params.quantized_activation_max, clamped);
+ output_data[i] = static_cast<T>(clamped);
+ }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ActivationParams &params, const RuntimeShape &input_shape,
+ const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+ ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ const T max_value = params.quantized_activation_max;
+ const T min_value = params.quantized_activation_min;
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const T val = input_data[i];
+ const T clamped = val > max_value ? max_value : val < min_value ? min_value : val;
+ output_data[i] = clamped;
+ }
+}
+
+// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary
+// dimensionality if the runtime code does a single loop over one dimension
+// that handles broadcasting as the base case. The code generator would then
+// generate max(D1, D2) nested for loops.
+inline void BroadcastMulFivefold(const ArithmeticParams &unswitched_params,
+ const RuntimeShape &unswitched_input1_shape,
+ const uint8 *unswitched_input1_data,
+ const RuntimeShape &unswitched_input2_shape,
+ const uint8 *unswitched_input2_data,
+ const RuntimeShape &output_shape, uint8 *output_data)
+{
+ ArithmeticParams switched_params = unswitched_params;
+ switched_params.input1_offset = unswitched_params.input2_offset;
+ switched_params.input2_offset = unswitched_params.input1_offset;
+
+ const bool use_unswitched = unswitched_params.broadcast_category ==
+ tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
+
+ const ArithmeticParams &params = use_unswitched ? unswitched_params : switched_params;
+ const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
+ const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+
+ // Fivefold nested loops. The second input resets its position for each
+ // iteration of the second loop. The first input resets its position at the
+ // beginning of the fourth loop. The innermost loop is an elementwise Mul of
+ // sections of the arrays.
+ uint8 *output_data_ptr = output_data;
+ const uint8 *input1_data_ptr = input1_data;
+ const uint8 *input2_data_reset = input2_data;
+ int y0 = params.broadcast_shape[0];
+ int y1 = params.broadcast_shape[1];
+ int y2 = params.broadcast_shape[2];
+ int y3 = params.broadcast_shape[3];
+ int y4 = params.broadcast_shape[4];
+ for (int i0 = 0; i0 < y0; ++i0)
+ {
+ const uint8 *input2_data_ptr;
+ for (int i1 = 0; i1 < y1; ++i1)
+ {
+ input2_data_ptr = input2_data_reset;
+ for (int i2 = 0; i2 < y2; ++i2)
+ {
+ for (int i3 = 0; i3 < y3; ++i3)
+ {
+ MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
+ input2_data_ptr += y4;
+ output_data_ptr += y4;
+ }
+ input1_data_ptr += y4;
+ }
+ }
+ input2_data_reset = input2_data_ptr;
+ }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+ const int16 *input1_data, const RuntimeShape &input2_shape,
+ const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data)
+{
+ ruy::profiler::ScopeLabel label("Mul/Int16");
+
+ const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+ for (int i = 0; i < flat_size; i++)
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+ F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+ output_data[i] = unclamped_result.raw();
+ }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+ const int16 *input1_data, const RuntimeShape &input2_shape,
+ const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data)
+{
+ ruy::profiler::ScopeLabel label("Mul/Int16Uint8");
+ int32 output_offset = params.output_offset;
+ int32 output_activation_min = params.quantized_activation_min;
+ int32 output_activation_max = params.quantized_activation_max;
+ TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+
+ const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+ for (int i = 0; i < flat_size; i++)
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+ F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+ int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
+ int16 clamped_result = std::min<int16>(output_activation_max - output_offset, rescaled_result);
+ clamped_result = std::max<int16>(output_activation_min - output_offset, clamped_result);
+ output_data[i] = output_offset + clamped_result;
+ }
+}
+
+inline void Sub16(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+ const int16_t *input1_data, const RuntimeShape &input2_shape,
+ const int16_t *input2_data, const RuntimeShape &output_shape,
+ int16_t *output_data)
+{
+ ruy::profiler::ScopeLabel label("Sub/Int16");
+ const int input1_shift = params.input1_shift;
+ const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+ const int16 output_activation_min = params.quantized_activation_min;
+ const int16 output_activation_max = params.quantized_activation_max;
+
+ TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
+ TFLITE_DCHECK_LE(input1_shift, 0);
+ TFLITE_DCHECK_LE(params.input2_shift, 0);
+ const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data;
+ const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data;
+ const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift;
+
+ if (input1_shift == 0)
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+ for (int i = 0; i < flat_size; ++i)
+ {
+ F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+ F0 scaled_input =
+ F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+ F0 result = SaturatingSub(input_ready_scaled, scaled_input);
+ const int16 raw_output = result.raw();
+ const int16 clamped_output =
+ std::min(output_activation_max, std::max(output_activation_min, raw_output));
+ output_data[i] = clamped_output;
+ }
+ }
+ else
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+ for (int i = 0; i < flat_size; ++i)
+ {
+ F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+ F0 scaled_input =
+ F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+ F0 result = SaturatingSub(scaled_input, input_ready_scaled);
+ const int16 raw_output = result.raw();
+ const int16 clamped_output =
+ std::min(output_activation_max, std::max(output_activation_min, raw_output));
+ output_data[i] = clamped_output;
+ }
+ }
+}
+
+template <typename Scalar>
+void Pack(const PackParams &params, const RuntimeShape *const *input_shapes,
+ const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("Pack");
+ const int dimensions = output_shape.DimensionsCount();
+ int axis = params.axis;
+ int inputs_count = params.inputs_count;
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; i++)
+ {
+ outer_size *= output_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = params.axis + 1; i < dimensions; i++)
+ {
+ copy_size *= output_shape.Dims(i);
+ }
+ TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+ for (int i = 0; i < inputs_count; ++i)
+ {
+ for (int k = 0; k < outer_size; k++)
+ {
+ const Scalar *input_ptr = input_data[i] + copy_size * k;
+ int loc = k * inputs_count * copy_size + i * copy_size;
+ memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
+ }
+ }
+}
+
+template <typename Scalar>
+void Unpack(const UnpackParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape &output_shape, Scalar *const *output_datas)
+{
+ ruy::profiler::ScopeLabel label("Unpack");
+ const int dimensions = input_shape.DimensionsCount();
+ const int outputs_count = params.num_split;
+
+ int outer_size = 1;
+ int axis = params.axis;
+ if (axis < 0)
+ {
+ axis += dimensions;
+ }
+ TFLITE_DCHECK_GE(axis, 0);
+ TFLITE_DCHECK_LT(axis, dimensions);
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = axis + 1; i < dimensions; ++i)
+ {
+ copy_size *= input_shape.Dims(i);
+ }
+ TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
+
+ for (int i = 0; i < outputs_count; ++i)
+ {
+ for (int k = 0; k < outer_size; k++)
+ {
+ Scalar *output_ptr = output_datas[i] + copy_size * k;
+ int loc = k * outputs_count * copy_size + i * copy_size;
+ memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+ }
+ }
+}
+
+template <typename Scalar>
+void PackWithScaling(const PackParams &params, const RuntimeShape *const *input_shapes,
+ const uint8 *const *input_data, const RuntimeShape &output_shape,
+ uint8 *output_data)
+{
+ ruy::profiler::ScopeLabel label("PackWithScaling");
+ const int dimensions = output_shape.DimensionsCount();
+ int axis = params.axis;
+ const int32 *input_zeropoint = params.input_zeropoint;
+ const float *input_scale = params.input_scale;
+ int inputs_count = params.inputs_count;
+ const int32 output_zeropoint = params.output_zeropoint;
+ const float output_scale = params.output_scale;
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; i++)
+ {
+ outer_size *= output_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = axis + 1; i < dimensions; i++)
+ {
+ copy_size *= output_shape.Dims(i);
+ }
+ TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+ Scalar *output_ptr = output_data;
+ const float inverse_output_scale = 1.f / output_scale;
+ for (int k = 0; k < outer_size; k++)
+ {
+ for (int i = 0; i < inputs_count; ++i)
+ {
+ if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale)
+ {
+ memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
+ }
+ else
+ {
+ assert(false);
+ const float scale = input_scale[i] * inverse_output_scale;
+ const float bias = -input_zeropoint[i] * scale;
+ auto input_ptr = input_data[i];
+ for (int j = 0; j < copy_size; ++j)
+ {
+ const int value =
+ static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+ output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
+ }
+ }
+ output_ptr += copy_size;
+ }
+ }
+}
+
+template <typename Scalar>
+void DepthConcatenation(const ConcatenationParams &params, const RuntimeShape *const *input_shapes,
+ const Scalar *const *input_data, const RuntimeShape &output_shape,
+ Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("DepthConcatenation");
+ auto params_copy = params;
+ params_copy.axis = 3;
+ Concatenation(params_copy, input_shapes, input_data, output_shape, output_data);
+}
+
+inline void LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+ const float *input_data, const RuntimeShape &unextended_prev_activ_shape,
+ const float *prev_activ_data, const RuntimeShape &weights_shape,
+ const float *weights_data, const RuntimeShape &unextended_bias_shape,
+ const float *bias_data, const RuntimeShape &unextended_prev_state_shape,
+ const float *prev_state_data,
+ const RuntimeShape &unextended_output_state_shape, float *output_state_data,
+ const RuntimeShape &unextended_output_activ_shape, float *output_activ_data,
+ const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data,
+ const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data)
+{
+ TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+ const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+ const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+ const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+ const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+ const RuntimeShape output_state_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+ const RuntimeShape output_activ_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+ const RuntimeShape concat_temp_shape =
+ RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+ const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+ TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+ const int weights_dim_count = weights_shape.DimensionsCount();
+ const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
+ output_state_shape, 0, output_activ_shape, 0);
+ const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
+ output_state_shape, 1, output_activ_shape, 1);
+ const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
+ output_state_shape, 2, output_activ_shape, 2);
+ const int input_depth = input_shape.Dims(3);
+ const int prev_activ_depth = prev_activ_shape.Dims(3);
+ const int total_input_depth = prev_activ_depth + input_depth;
+ TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+ TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+ const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+ TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+ TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+ const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+ 3, output_activ_shape, 3);
+ TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+
+ // Concatenate prev_activ and input data together
+ std::vector<float const *> concat_input_arrays_data;
+ std::vector<RuntimeShape const *> concat_input_arrays_shapes;
+ concat_input_arrays_data.push_back(input_data);
+ concat_input_arrays_data.push_back(prev_activ_data);
+ concat_input_arrays_shapes.push_back(&input_shape);
+ concat_input_arrays_shapes.push_back(&prev_activ_shape);
+ tflite::ConcatenationParams concat_params;
+ concat_params.axis = 3;
+ concat_params.inputs_count = concat_input_arrays_data.size();
+ Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]),
+ concat_temp_shape, concat_temp_data);
+
+ // Fully connected
+ tflite::FullyConnectedParams fc_params;
+ fc_params.float_activation_min = std::numeric_limits<float>::lowest();
+ fc_params.float_activation_max = std::numeric_limits<float>::max();
+ FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data,
+ bias_shape, bias_data, activ_temp_shape, activ_temp_data);
+
+ // Memory state update (the LSTM "guts")
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int w = 0; w < width; ++w)
+ {
+ for (int h = 0; h < height; ++h)
+ {
+ for (int c = 0; c < output_depth; ++c)
+ {
+ const float input_gate =
+ 1.f /
+ (1.f +
+ std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)]));
+ const float new_input =
+ std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]);
+ const float forget_gate =
+ 1.f /
+ (1.f +
+ std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)]));
+ const float output_gate =
+ 1.f /
+ (1.f +
+ std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)]));
+ const float new_state =
+ input_gate * new_input +
+ forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)];
+ output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
+ output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
+ output_gate * std::tanh(new_state);
+ }
+ }
+ }
+ }
+}
+
+// Quantized LSTM cell implementation.
+// The quantization of the input, output arrays is as follows:
+// - The input activations are quantized as uint8 on the interval
+// [-1, 127/128].
+// The rationale for that is that is the natural interval for output
+// activations (see next point) and these need to be concatenated together.
+// We could accommodate different ranges by re-scaling, but we empirically
+// found that setting the input activations range to be [-1, 127/128] in the
+// first place, removing the need for re-scaling, greatly improves accuracy.
+// - The output activations are quantized as uint8 on the interval
+// [-1, 127/128].
+// The rationale for that is that the definition of a LSTM cell makes them
+// intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
+// makes for simpler, more accurate fixed-point arithmetic.
+// - The output-at-previous-timestep state array is obviously quantized as
+// the output activations.
+// - The internal LSTM memory (not the output-at-previous-timestep, the other
+// internal state array) is int16-quantized and may use any power-of-two,
+// symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
+// StateIntegerBits below, see the below discussion of that template
+// parameter ("The StateIntegerBits template parameter").
+// - The output of the internal fully-connected node is int16-quantized
+// on the interval [-8, 8 * 32767/32768], the rationale for which is
+// explained just below ("Why [-8, 8] for fully-connected output?").
+//
+//
+// === The StateIntegerBits template parameter ===
+//
+// The StateIntegerBits template parameter controls the fixed-point format used
+// to represent the internal memory of the LSTM cell (not the
+// output-at-previous-timestep, the other internal state array). It's currently
+// a template parameter so that the model can control that. The most typical
+// value for StateIntegerBits is 4. Other plausible values are anywhere between
+// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
+// and drop that template parameter. The reason why it can't be a runtime
+// parameter is that this controls the fixed-point format used, i.e. we need to
+// generate actually different code based on it. In particular, we generate code
+// for a fixed-point tanh() implementation for that format, which internally
+// uses a fixed-point exp() implementation, which internally uses a
+// barrel-shifter with a number of steps that depends on StateIntegerBits.
+// Another consequence of that is that a higher value of StateIntegerBits
+// results in a more expensive implementation (more barrel shifter steps
+// needed).
+//
+//
+// === Why [-8, 8] for fully-connected output? ===
+//
+// This array is only fed to Logistic and Tanh functions, for which
+// the quantized implementation will want to use fixed-point arithmetic,
+// requiring a power-of-two representation interval. Thus, we should right
+// away quantize this array to a power-of-two interval; otherwise,
+// implementation will need to rescale that, losing any benefit that a tighter
+// representation interval might otherwise yield, while introducing some
+// numerical error and computational overhead.
+//
+// Now, Logistic and Tanh
+// are nearly constant (nearly equal to their horizontal asymptotes)
+// outside of a small bounded interval around 0:
+//
+// Logistic(4) = 1 - 1.8e-2 Tanh(4) = 1 - 6.7e-4
+// Logistic(8) = 1 - 3.4e-4 Tanh(8) = 1 - 2.3e-7
+// Logistic(16) = 1 - 1.1e-7 Tanh(16) = 1 - 2.5e-14
+//
+// From this, we see that clamping to [-4, 4] would be too inaccurate
+// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
+// while clamping to [-16, 16] would make no difference even in float32.
+// However, for a fixed-point implementation in 16-bit integers, using 5
+// integer bits to represent the [-16, 16] range would leave only 11
+// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
+// representable values. Notice that is higher than the
+// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
+// Using [-8, 8] thus seems like the better compromise overall, enjoying
+// an increment of 2.4e-4 between representable values and a worst-case
+// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
+// [-16, 16].
+//
+// Moreover, all other things being equal, it is nice to choose the narrower
+// representation range, as that makes the implementation of fixed-point
+// math functions a little cheaper (each integer bit requires an additional
+// barrel-shifter atep in the implementation of exp(-x)). That is further
+// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
+// sense for 32-bit float or 32-bit fixed-point quantization, but we are
+// aiming for 16-bit fixed-point quantization of these internal nodes here.
+//
+template <int StateIntegerBits>
+inline void
+LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+ const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape,
+ const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape,
+ const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape,
+ const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape,
+ const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape,
+ int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape,
+ uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape,
+ uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape,
+ int16 *activ_temp_data_int16, void *gemmlowp_context)
+{
+ (void)gemmlowp_context; // only used in optimized code.
+ int32 weights_zero_point = params.weights_zero_point;
+ int32 accum_multiplier = params.accum_multiplier;
+ int accum_shift = params.accum_shift;
+ TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+ const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+ const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+ const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+ const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+ const RuntimeShape output_state_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+ const RuntimeShape output_activ_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+ const RuntimeShape concat_temp_shape =
+ RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+ const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+ TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+ // Gather dimensions information, and perform consistency checks.
+ const int weights_dim_count = weights_shape.DimensionsCount();
+ const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape,
+ output_state_shape, output_activ_shape);
+ const int input_depth = input_shape.Dims(3);
+ const int prev_activ_depth = prev_activ_shape.Dims(3);
+ const int total_input_depth = prev_activ_depth + input_depth;
+ TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+ const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+ TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+ TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+ TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+ const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+ 3, output_activ_shape, 3);
+ TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+ const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
+ const int fc_output_depth =
+ MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
+ const int fc_accum_depth = total_input_depth;
+ TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
+
+ // Depth-concatenate prev_activ and input data together.
+ uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8};
+ const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape};
+ tflite::ConcatenationParams concat_params;
+ concat_params.axis = 3;
+ concat_params.inputs_count = 2;
+ Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data,
+ concat_temp_shape, concat_temp_data_uint8);
+
+ // Implementation of the fully connected node inside the LSTM cell.
+ // The operands are 8-bit integers, the accumulators are internally 32bit
+ // integers, and the output is 16-bit fixed-point with 3 integer bits so
+ // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
+ // is explained in the function comment above.
+ for (int b = 0; b < fc_batches; ++b)
+ {
+ for (int out_c = 0; out_c < fc_output_depth; ++out_c)
+ {
+ // Internal accumulation.
+ // Initialize accumulator with the bias-value.
+ int32 accum = bias_data_int32[out_c];
+ // Accumulation loop.
+ for (int d = 0; d < fc_accum_depth; ++d)
+ {
+ int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
+ int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
+ accum += input_val * weights_val;
+ }
+ // Down-scale the final int32 accumulator to the scale used by our
+ // (16-bit, using 3 integer bits) fixed-point format. The quantized
+ // multiplier and shift here have been pre-computed offline
+ // (e.g. by toco).
+ accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
+ // Saturate, cast to int16, and store to the temporary activations array.
+ accum = std::max(-32768, std::min(32767, static_cast<int>(accum)));
+ activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
+ }
+ }
+
+ // Rest of the LSTM cell: tanh and logistic math functions, and some adds
+ // and muls, all done in 16-bit fixed-point.
+ for (int b = 0; b < outer_size; ++b)
+ {
+ for (int c = 0; c < output_depth; ++c)
+ {
+ // Define the fixed-point data types that we will use here. All use
+ // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
+ // They only differ by the number of integral vs. fractional bits,
+ // determining the range of values that they can represent.
+ //
+ // F0 uses 0 integer bits, range [-1, 1].
+ // This is the return type of math functions such as tanh, logistic,
+ // whose range is in [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+ // F3 uses 3 integer bits, range [-8, 8].
+ // This is the range of the previous fully-connected node's output,
+ // which is our input here.
+ using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
+ // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
+ // 2^StateIntegerBits]. It's used to represent the internal state, whose
+ // number of integer bits is currently dictated by the model. See comment
+ // on the StateIntegerBits template parameter above.
+ using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
+ // Implementation of input gate, using fixed-point logistic function.
+ F3 input_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
+ F0 input_gate_output = gemmlowp::logistic(input_gate_input);
+ // Implementation of input modulation gate, using fixed-point tanh
+ // function.
+ F3 input_modulation_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
+ F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input);
+ // Implementation of forget gate, using fixed-point logistic function.
+ F3 forget_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
+ F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
+ // Implementation of output gate, using fixed-point logistic function.
+ F3 output_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
+ F0 output_gate_output = gemmlowp::logistic(output_gate_input);
+ // Implementation of internal multiplication nodes, still in fixed-point.
+ F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output;
+ FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
+ FS prev_state_times_forget_state = forget_gate_output * prev_state;
+ // Implementation of internal addition node, saturating.
+ FS new_state =
+ gemmlowp::SaturatingAdd(gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
+ prev_state_times_forget_state);
+ // Implementation of last internal Tanh node, still in fixed-point.
+ // Since a Tanh fixed-point implementation is specialized for a given
+ // number or integer bits, and each specialization can have a substantial
+ // code size, and we already used above a Tanh on an input with 3 integer
+ // bits, and per the table in the above function comment there is no
+ // significant accuracy to be lost by clamping to [-8, +8] for a
+ // 3-integer-bits representation, let us just do that. This helps people
+ // porting this to targets where code footprint must be minimized.
+ F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
+ F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
+ // Store the new internal state back to memory, as 16-bit integers.
+ // Note: here we store the original value with StateIntegerBits, not
+ // the rescaled 3-integer-bits value fed to tanh.
+ output_state_data_int16[b * output_depth + c] = new_state.raw();
+ // Down-scale the output activations to 8-bit integers, saturating,
+ // and store back to memory.
+ int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
+ int16 clamped_output_activ =
+ std::max<int16>(-128, std::min<int16>(127, rescaled_output_activ));
+ output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ;
+ }
+ }
+}
+
+template <typename Scalar>
+void Split(const SplitParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape *const *output_shapes, Scalar *const *output_data)
+{
+ ruy::profiler::ScopeLabel label("Split");
+ const int split_dimensions = input_shape.DimensionsCount();
+ int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
+ int outputs_count = params.num_split;
+ TFLITE_DCHECK_LT(axis, split_dimensions);
+
+ int64_t split_size = 0;
+ for (int i = 0; i < outputs_count; i++)
+ {
+ TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions);
+ for (int j = 0; j < split_dimensions; j++)
+ {
+ if (j != axis)
+ {
+ MatchingDim(*output_shapes[i], j, input_shape, j);
+ }
+ }
+ split_size += output_shapes[i]->Dims(axis);
+ }
+ TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis));
+ int64_t outer_size = 1;
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+ // For all output arrays,
+ // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+ int64_t base_inner_size = 1;
+ for (int i = axis + 1; i < split_dimensions; ++i)
+ {
+ base_inner_size *= input_shape.Dims(i);
+ }
+
+ const Scalar *input_ptr = input_data;
+ for (int k = 0; k < outer_size; k++)
+ {
+ for (int i = 0; i < outputs_count; ++i)
+ {
+ const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size;
+ memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
+ input_ptr += copy_size;
+ }
+ }
+}
+
+inline int NodeOffset(int b, int h, int w, int height, int width)
+{
+ return (b * height + h) * width + w;
+}
+
+inline void LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+ const RuntimeShape &input_shape, const float *input_data,
+ const RuntimeShape &output_shape, float *output_data)
+{
+ const int trailing_dim = input_shape.DimensionsCount() - 1;
+ const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+ const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int c = 0; c < depth; ++c)
+ {
+ const int begin_input_c = std::max(0, static_cast<int>(c - op_params.range));
+ const int end_input_c = std::min(depth, static_cast<int>(c + op_params.range));
+ float accum = 0.f;
+ for (int input_c = begin_input_c; input_c < end_input_c; ++input_c)
+ {
+ const float input_val = input_data[i * depth + input_c];
+ accum += input_val * input_val;
+ }
+ const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta);
+ output_data[i * depth + c] = input_data[i * depth + c] * multiplier;
+ }
+ }
+}
+
+inline void Dequantize(const RuntimeShape &input_shape, const Eigen::half *input_data,
+ const RuntimeShape &output_shape, float *output_data)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; i++)
+ {
+ output_data[i] = static_cast<float>(input_data[i]);
+ }
+}
+
+inline void FakeQuant(const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape,
+ const float *input_data, const RuntimeShape &output_shape, float *output_data)
+{
+ ruy::profiler::ScopeLabel label("FakeQuant");
+ float rmin = op_params.minmax.min;
+ float rmax = op_params.minmax.max;
+ int num_bits = op_params.num_bits;
+ // 0 should always be a representable value. Let's assume that the initial
+ // min,max range contains 0.
+ TFLITE_DCHECK_LE(rmin, 0.0f);
+ TFLITE_DCHECK_GE(rmax, 0.0f);
+ TFLITE_DCHECK_LT(rmin, rmax);
+
+ // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor.
+ int quant_min = 0;
+ int quant_max = (1 << num_bits) - 1;
+ float nudged_min, nudged_max, nudged_scale;
+ NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale);
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size);
+}
+
+// Common subroutine for both `GatherNd` and `GatherNdString`.
+struct GatherNdHelperResult
+{
+ int n_slices;
+ int slice_size;
+ int indices_nd;
+ std::vector<int> dims_to_count;
+};
+
+// Returns common values being used on both `GatherNd` and `GatherNdString`.
+inline GatherNdHelperResult GatherNdHelper(const RuntimeShape &params_shape,
+ const RuntimeShape &indices_shape)
+{
+ GatherNdHelperResult ret;
+ ret.n_slices = 1;
+ ret.slice_size = 1;
+ const int indices_dims = indices_shape.DimensionsCount();
+ ret.indices_nd = indices_shape.Dims(indices_dims - 1);
+ const int params_dims = params_shape.DimensionsCount();
+ for (int i = 0; i < indices_dims - 1; ++i)
+ {
+ ret.n_slices *= indices_shape.Dims(i);
+ }
+ for (int i = ret.indices_nd; i < params_dims; ++i)
+ {
+ ret.slice_size *= params_shape.Dims(i);
+ }
+
+ int remain_flat_size = params_shape.FlatSize();
+ ret.dims_to_count = std::vector<int>(ret.indices_nd, 0);
+ for (int i = 0; i < ret.indices_nd; ++i)
+ {
+ ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i);
+ remain_flat_size = ret.dims_to_count[i];
+ }
+
+ return ret;
+}
+
+template <typename ParamsT, typename IndicesT = int32>
+inline void GatherNd(const RuntimeShape &params_shape, const ParamsT *params_data,
+ const RuntimeShape &indices_shape, const IndicesT *indices_data,
+ const RuntimeShape &output_shape, ParamsT *output_data)
+{
+ ruy::profiler::ScopeLabel label("GatherNd");
+
+ const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+ for (int i = 0; i < res.n_slices; ++i)
+ {
+ int from_pos = 0;
+ for (int j = 0; j < res.indices_nd; ++j)
+ {
+ from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+ }
+ std::memcpy(output_data + i * res.slice_size, params_data + from_pos,
+ sizeof(ParamsT) * res.slice_size);
+ }
+}
+
+#ifndef TF_LITE_STATIC_MEMORY
+template <typename IndicesT = int32>
+inline void GatherNdString(const RuntimeShape &params_shape, const TfLiteTensor *params_data,
+ const RuntimeShape &indices_shape, const IndicesT *indices_data,
+ const RuntimeShape &output_shape, TfLiteTensor *output_data)
+{
+ ruy::profiler::ScopeLabel label("GatherNdString");
+
+ const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+ DynamicBuffer buffer;
+ for (int i = 0; i < res.n_slices; ++i)
+ {
+ int from_pos = 0;
+ for (int j = 0; j < res.indices_nd; ++j)
+ {
+ from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+ }
+ for (int j = 0; j < res.slice_size; ++j)
+ {
+ buffer.AddString(GetString(params_data, from_pos + j));
+ }
+ }
+ buffer.WriteToTensor(output_data, /*new_shape=*/nullptr);
+}
+#endif
+
+template <typename IndicesT, typename UpdatesT>
+inline void ScatterNd(const RuntimeShape &indices_shape, const IndicesT *indices_data,
+ const RuntimeShape &updates_shape, const UpdatesT *updates_data,
+ const RuntimeShape &output_shape, UpdatesT *output_data)
+{
+ ruy::profiler::ScopeLabel label("ScatterNd");
+
+ int n_slices = 1;
+ int slice_size = 1;
+ const int outer_dims = indices_shape.DimensionsCount() - 1;
+ const int indices_nd = indices_shape.Dims(outer_dims);
+ const int updates_dims = updates_shape.DimensionsCount();
+ for (int i = 0; i < outer_dims; ++i)
+ {
+ n_slices *= indices_shape.Dims(i);
+ }
+ for (int i = outer_dims; i < updates_dims; ++i)
+ {
+ slice_size *= updates_shape.Dims(i);
+ }
+
+ int output_flat_size = output_shape.FlatSize();
+ int remain_flat_size = output_flat_size;
+ std::vector<int> dims_to_count(indices_nd, 0);
+ for (int i = 0; i < indices_nd; ++i)
+ {
+ dims_to_count[i] = remain_flat_size / output_shape.Dims(i);
+ remain_flat_size = dims_to_count[i];
+ }
+
+ memset(output_data, 0, sizeof(UpdatesT) * output_flat_size);
+ for (int i = 0; i < n_slices; ++i)
+ {
+ int to_pos = 0;
+ for (int j = 0; j < indices_nd; ++j)
+ {
+ IndicesT idx = indices_data[i * indices_nd + j];
+ TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j));
+ to_pos += idx * dims_to_count[j];
+ }
+ for (int j = 0; j < slice_size; j++)
+ {
+ output_data[to_pos + j] += updates_data[i * slice_size + j];
+ }
+ }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+ const RuntimeShape &output_shape, SequentialTensorWriter<T> *writer)
+{
+ const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
+ TFLITE_DCHECK_LE(op_params.begin_count, 5);
+ TFLITE_DCHECK_LE(op_params.size_count, 5);
+ const int begin_count = op_params.begin_count;
+ const int size_count = op_params.size_count;
+ // We front-pad the begin and size vectors.
+ std::array<int, 5> start;
+ std::array<int, 5> stop;
+ for (int i = 0; i < 5; ++i)
+ {
+ int padded_i = 5 - i;
+ start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
+ stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
+ ? ext_shape.Dims(i)
+ : start[i] + op_params.size[size_count - padded_i];
+ }
+
+ for (int i0 = start[0]; i0 < stop[0]; ++i0)
+ {
+ for (int i1 = start[1]; i1 < stop[1]; ++i1)
+ {
+ for (int i2 = start[2]; i2 < stop[2]; ++i2)
+ {
+ for (int i3 = start[3]; i3 < stop[3]; ++i3)
+ {
+ for (int i4 = start[4]; i4 < stop[4]; ++i4)
+ {
+ writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
+ }
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+ const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+ SequentialTensorWriter<T> writer(input_data, output_data);
+ return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+ const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output)
+{
+ SequentialTensorWriter<T> writer(input, output);
+ return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+ auto min_value = input2_data[0];
+ for (int i = 0; i < flat_size; i++)
+ {
+ output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i];
+ }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+ const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+ // Drop shape of second input: not needed.
+ Minimum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T>
+void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+ auto max_value = input2_data[0];
+ for (int i = 0; i < flat_size; i++)
+ {
+ output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i];
+ }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+ const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+ // Drop shape of second input: not needed.
+ Maximum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T1, typename T2, typename T3>
+void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data,
+ const RuntimeShape &output_shape, T2 *output_data)
+{
+ ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater<T1>());
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T1, typename T2, typename T3>
+inline void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data,
+ const RuntimeShape &input2_shape, const T3 *input2_data,
+ const RuntimeShape &output_shape, T2 *output_data)
+{
+ // Drop shape of second input: not needed.
+ ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename D, typename T>
+void Select(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ const RuntimeShape &input_x_shape, const T *input_x_data,
+ const RuntimeShape &input_y_shape, const T *input_y_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ int64_t flatsize;
+ // Allow select operator executions on mixed scalar tensors and one element
+ // tensors.
+ if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
+ input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1)
+ {
+ flatsize = 1;
+ }
+ else
+ {
+ flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
+ }
+ for (int64_t i = 0; i < flatsize; ++i)
+ {
+ output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i];
+ }
+}
+
+template <typename D, typename T>
+void RankOneSelect(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ const RuntimeShape &input_x_shape, const T *input_x_data,
+ const RuntimeShape &input_y_shape, const T *input_y_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int64_t outer_size = input_condition_shape.FlatSize();
+ int64_t inner_size;
+ if (input_condition_shape.DimensionsCount() == 0)
+ {
+ inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
+ }
+ else
+ {
+ TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size);
+ inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
+ }
+
+ int64_t offset = 0;
+ for (int64_t i = 0; i < outer_size; i++)
+ {
+ const T *input_data = input_condition_data[i] ? input_x_data : input_y_data;
+ memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
+ offset += inner_size;
+ }
+}
+
+template <typename D, typename T>
+void BroadcastSelect4DSlow(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ const RuntimeShape &input_x_shape, const T *input_x_data,
+ const RuntimeShape &input_y_shape, const T *input_y_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
+
+ const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+
+ NdArrayDesc<4> desc_condition;
+ NdArrayDesc<4> desc_x;
+ NdArrayDesc<4> desc_y;
+ NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape,
+ &desc_condition, &desc_x, &desc_y);
+
+ // In Tensorflow, the dimensions are canonically named (batch_number, row,
+ // col, channel), with extents (batches, height, width, depth), with the
+ // trailing dimension changing most rapidly (channels has the smallest
+ // stride, typically 1 element).
+ //
+ // In generated C code, we store arrays with the dimensions reversed. The
+ // first dimension has smallest stride.
+ //
+ // We name our variables by their Tensorflow convention, but generate C code
+ // nesting loops such that the innermost loop has the smallest stride for
+ // the best cache behavior.
+ for (int b = 0; b < extended_output_shape.Dims(0); ++b)
+ {
+ for (int y = 0; y < extended_output_shape.Dims(1); ++y)
+ {
+ for (int x = 0; x < extended_output_shape.Dims(2); ++x)
+ {
+ for (int c = 0; c < extended_output_shape.Dims(3); ++c)
+ {
+ const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c);
+ const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
+ const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
+ output_data[Offset(extended_output_shape, b, y, x, c)] =
+ input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
+ }
+ }
+ }
+ }
+}
+
+template <typename D, typename T>
+void SelectTrueCoords(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ T *output_data)
+{
+ const size_t size = input_condition_shape.FlatSize();
+ if (size == 0)
+ {
+ // Dimension is zero, in which case we don't need to output.
+ return;
+ }
+ const size_t cond_rank = input_condition_shape.DimensionsCount();
+
+ std::vector<int> dims_to_count(cond_rank, 0);
+ int cur_flat_size = size;
+ for (int i = 0; i < cond_rank; ++i)
+ {
+ dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i);
+ cur_flat_size = dims_to_count[i];
+ }
+
+ int output_index = 0;
+ for (int i = 0; i < size; ++i)
+ {
+ if (input_condition_data[i])
+ {
+ // Insert the coordinate of the current item (row major) into output.
+ int flat_index = i;
+ for (int j = 0; j < cond_rank; ++j)
+ {
+ int coord_j = flat_index / dims_to_count[j];
+ output_data[output_index * cond_rank + j] = coord_j;
+ flat_index %= dims_to_count[j];
+ }
+ output_index++;
+ }
+ }
+}
+
+// For easy implementation, the indices is always a vector of size-4 vectors.
+template <typename T, typename TI>
+inline void SparseToDense(const std::vector<std::vector<TI>> &indices, const T *values,
+ T default_value, bool value_is_scalar,
+ const RuntimeShape &unextended_output_shape, T *output_data)
+{
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+ const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+ const int value_count = indices.size();
+
+ // First fill the output_data with default value.
+ const int num_elements = output_shape.FlatSize();
+ for (int i = 0; i < num_elements; ++i)
+ {
+ output_data[i] = default_value;
+ }
+
+ // Special handle for value is scalar case to avoid checking the boolean
+ // condition within the loop every time.
+ if (value_is_scalar)
+ {
+ for (int i = 0; i < value_count; ++i)
+ {
+ const std::vector<TI> &index = indices[i];
+ TFLITE_DCHECK_EQ(index.size(), 4);
+ const T value = *values; // just use the first value.
+ output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+ }
+ return;
+ }
+
+ // Go through the values and indices to fill the sparse values.
+ for (int i = 0; i < value_count; ++i)
+ {
+ const std::vector<TI> &index = indices[i];
+ TFLITE_DCHECK_EQ(index.size(), 4);
+ const T value = values[i];
+ output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+ }
+}
+
+template <typename T>
+inline void Pow(const RuntimeShape &input1_shape, const T *input1_data,
+ const RuntimeShape &input2_shape, const T *input2_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ output_data[i] = std::pow(input1_data[i], input2_data[i]);
+ }
+}
+
+template <typename T>
+inline void BroadcastPow4DSlow(const RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const RuntimeShape &unextended_input2_shape, const T *input2_data,
+ const RuntimeShape &unextended_output_shape, T *output_data)
+{
+ TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+ const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+ NdArrayDesc<4> desc1;
+ NdArrayDesc<4> desc2;
+ NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+ &desc2);
+
+ for (int b = 0; b < output_shape.Dims(0); ++b)
+ {
+ for (int y = 0; y < output_shape.Dims(1); ++y)
+ {
+ for (int x = 0; x < output_shape.Dims(2); ++x)
+ {
+ for (int c = 0; c < output_shape.Dims(3); ++c)
+ {
+ auto out_idx = Offset(output_shape, b, y, x, c);
+ auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+ auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+ auto in1_val = input1_data[in1_idx];
+ auto in2_val = input2_data[in2_idx];
+ output_data[out_idx] = std::pow(in1_val, in2_val);
+ }
+ }
+ }
+ }
+}
+
+template <typename Scalar>
+void Reverse(int axis, const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape &output_shape, Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("Reverse");
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+
+ int copy_size = 1;
+ for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i)
+ {
+ copy_size *= input_shape.Dims(i);
+ }
+
+ const int dims_at_axis = input_shape.Dims(axis);
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int j = 0; j < dims_at_axis; ++j)
+ {
+ const int start_pos = (i * dims_at_axis + j) * copy_size;
+ Scalar *output_ptr = output_data + start_pos;
+ int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size;
+ memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+ }
+ }
+}
+
+template <typename Scalar, typename TS>
+void ReverseSequence(const TS *seq_lengths, const int seq_dim, const int batch_dim,
+ const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape &output_shape, Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("ReverseSequence");
+
+ int outer_size = 1;
+ int outer_dim = std::min(batch_dim, seq_dim);
+ int medium_dim = std::max(batch_dim, seq_dim);
+ for (int i = 0; i < outer_dim; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+
+ int medium_size = 1;
+ for (int i = outer_dim + 1; i < medium_dim; ++i)
+ {
+ medium_size *= input_shape.Dims(i);
+ }
+
+ int copy_size = 1;
+ for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i)
+ {
+ copy_size *= input_shape.Dims(i);
+ }
+
+ const int dims_at_outer_dim = input_shape.Dims(outer_dim);
+ const int dims_at_medium_dim = input_shape.Dims(medium_dim);
+
+ Scalar *output_ptr;
+ if (batch_dim > seq_dim)
+ {
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int j = 0; j < dims_at_outer_dim; ++j)
+ {
+ const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+ for (int p = 0; p < medium_size; ++p)
+ {
+ for (int q = 0; q < dims_at_medium_dim; ++q)
+ {
+ const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+ const Scalar *in_ptr = input_data + in_pos;
+ int sl = seq_lengths[q] - 1;
+ if (j > sl)
+ {
+ output_ptr = output_data + in_pos;
+ }
+ else
+ {
+ const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size;
+ const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+ output_ptr = output_data + out_pos;
+ }
+ memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+ }
+ }
+ }
+ }
+ }
+ else if (batch_dim < seq_dim)
+ {
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int j = 0; j < dims_at_outer_dim; ++j)
+ {
+ const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+ int sl = seq_lengths[j] - 1;
+ const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+ for (int p = 0; p < medium_size; ++p)
+ {
+ for (int q = 0; q < dims_at_medium_dim; ++q)
+ {
+ const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+ const Scalar *in_ptr = input_data + in_pos;
+ if (q > sl)
+ {
+ output_ptr = output_data + in_pos;
+ }
+ else
+ {
+ const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size;
+ output_ptr = output_data + out_pos;
+ }
+ memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+ }
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+inline void SegmentSum(const RuntimeShape &input_shape, const T *input_data,
+ const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape);
+
+ memset(output_data, 0, sizeof(T) * output_shape.FlatSize());
+
+ for (int i = 0; i < input_shape.Dims(0); i++)
+ {
+ int output_index = segment_ids_data[i];
+ for (int j = 0; j < segment_flat_size; ++j)
+ {
+ output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j];
+ }
+ }
+}
+
+} // namespace reference_ops
+} // namespace tflite
+
+#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
diff --git a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
index 428b15ee0..1e6c41ecc 100644
--- a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
@@ -13,6 +13,7 @@ REGISTER_KERNEL(Div)
REGISTER_KERNEL(Elu)
REGISTER_KERNEL(Exp)
REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
REGISTER_KERNEL(Floor)
REGISTER_KERNEL(FloorDiv)
REGISTER_KERNEL(Equal)
@@ -48,6 +49,7 @@ REGISTER_KERNEL(PadV2)
REGISTER_KERNEL(Pow)
REGISTER_KERNEL(PRelu)
REGISTER_KERNEL(Quantize)
+REGISTER_KERNEL(ReduceMax)
REGISTER_KERNEL(Relu)
REGISTER_KERNEL(Relu6)
REGISTER_KERNEL(Reshape)
@@ -55,6 +57,7 @@ REGISTER_KERNEL(ResizeBilinear)
REGISTER_KERNEL(ResizeNearestNeighbor)
REGISTER_KERNEL(ReverseV2)
REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
REGISTER_KERNEL(Slice)
REGISTER_KERNEL(Softmax)
REGISTER_KERNEL(SpaceToBatchND)
diff --git a/compiler/luci-interpreter/pal/linux/PALreference_ops.h b/compiler/luci-interpreter/pal/linux/PALreference_ops.h
new file mode 100644
index 000000000..825ebfe8e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALreference_ops.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
diff --git a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
index d134a6b95..f0df58db3 100644
--- a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
@@ -12,6 +12,7 @@ REGISTER_KERNEL(Div)
REGISTER_KERNEL(Elu)
REGISTER_KERNEL(Exp)
REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
REGISTER_KERNEL(Floor)
REGISTER_KERNEL(FloorDiv)
REGISTER_KERNEL(Equal)
@@ -44,6 +45,7 @@ REGISTER_KERNEL(Reshape)
REGISTER_KERNEL(ResizeBilinear)
REGISTER_KERNEL(ResizeNearestNeighbor)
REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
REGISTER_KERNEL(Softmax)
REGISTER_KERNEL(SpaceToBatchND)
REGISTER_KERNEL(SpaceToDepth)
diff --git a/compiler/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-interpreter/pal/mcu/PALDequantize.h
index 15ff0327b..efa6b167e 100644
--- a/compiler/luci-interpreter/pal/mcu/PALDequantize.h
+++ b/compiler/luci-interpreter/pal/mcu/PALDequantize.h
@@ -18,7 +18,7 @@
#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+#include "PALreference_ops.h"
namespace luci_interpreter_pal
{
diff --git a/compiler/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-interpreter/pal/mcu/PALQuantize.h
index 6046789ae..effb85d54 100644
--- a/compiler/luci-interpreter/pal/mcu/PALQuantize.h
+++ b/compiler/luci-interpreter/pal/mcu/PALQuantize.h
@@ -17,7 +17,7 @@
#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
#define LUCI_INTERPRETER_PAL_QUANTIZE_H
-#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+#include "PALreference_ops.h"
namespace luci_interpreter_pal
{
diff --git a/compiler/luci-interpreter/pal/mcu/PALreference_ops.h b/compiler/luci-interpreter/pal/mcu/PALreference_ops.h
new file mode 100644
index 000000000..62c720937
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALreference_ops.h
@@ -0,0 +1,1556 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+#define LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <type_traits>
+
+#include "third_party/eigen3/Eigen/Core"
+#include "fixedpoint/fixedpoint.h"
+#include "ruy/profiler/instrumentation.h" // from @ruy
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/add.h"
+#include "tensorflow/lite/kernels/internal/reference/add_n.h"
+#include "tensorflow/lite/kernels/internal/reference/arg_min_max.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_matmul.h"
+#include "tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/binary_function.h"
+#include "tensorflow/lite/kernels/internal/reference/cast.h"
+#include "tensorflow/lite/kernels/internal/reference/ceil.h"
+#include "tensorflow/lite/kernels/internal/reference/comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/concatenation.h"
+#include "tensorflow/lite/kernels/internal/reference/conv.h"
+#include "tensorflow/lite/kernels/internal/reference/depth_to_space.h"
+#include "tensorflow/lite/kernels/internal/reference/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/div.h"
+#include "tensorflow/lite/kernels/internal/reference/elu.h"
+#include "tensorflow/lite/kernels/internal/reference/exp.h"
+#include "tensorflow/lite/kernels/internal/reference/fill.h"
+#include "tensorflow/lite/kernels/internal/reference/floor.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_div.h"
+#include "tensorflow/lite/kernels/internal/reference/floor_mod.h"
+#include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
+#include "tensorflow/lite/kernels/internal/reference/gather.h"
+#include "tensorflow/lite/kernels/internal/reference/hard_swish.h"
+#include "tensorflow/lite/kernels/internal/reference/l2normalization.h"
+#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
+#include "tensorflow/lite/kernels/internal/reference/log_softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/reference/maximum_minimum.h"
+#include "tensorflow/lite/kernels/internal/reference/mul.h"
+#include "tensorflow/lite/kernels/internal/reference/neg.h"
+#include "tensorflow/lite/kernels/internal/reference/pad.h"
+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
+#include "tensorflow/lite/kernels/internal/reference/prelu.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/reference/quantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reduce.h"
+#include "tensorflow/lite/kernels/internal/reference/requantize.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_bilinear.h"
+#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
+#include "tensorflow/lite/kernels/internal/reference/round.h"
+#include "tensorflow/lite/kernels/internal/reference/softmax.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h"
+#include "tensorflow/lite/kernels/internal/reference/space_to_depth.h"
+#include "tensorflow/lite/kernels/internal/reference/strided_slice.h"
+#include "tensorflow/lite/kernels/internal/reference/string_comparisons.h"
+#include "tensorflow/lite/kernels/internal/reference/sub.h"
+#include "tensorflow/lite/kernels/internal/reference/tanh.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose_conv.h"
+#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
+#include "tensorflow/lite/kernels/internal/tensor.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite
+{
+
+namespace reference_ops
+{
+
+template <typename T>
+inline void Relu(const RuntimeShape &input_shape, const T *input_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const T val = input_data[i];
+ const T lower = 0;
+ const T clamped = val < lower ? lower : val;
+ output_data[i] = clamped;
+ }
+}
+
+template <typename T>
+inline void Relu1(const RuntimeShape &input_shape, const T *input_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ ruy::profiler::ScopeLabel label("Relu1 (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const T val = input_data[i];
+ const T upper = 1;
+ const T lower = -1;
+ const T clamped = val > upper ? upper : val < lower ? lower : val;
+ output_data[i] = clamped;
+ }
+}
+
+inline void Relu6(const RuntimeShape &input_shape, const float *input_data,
+ const RuntimeShape &output_shape, float *output_data)
+{
+ ruy::profiler::ScopeLabel label("Relu6 (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const float val = input_data[i];
+ const float upper = 6;
+ const float lower = 0;
+ const float clamped = val > upper ? upper : val < lower ? lower : val;
+ output_data[i] = clamped;
+ }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ReluParams &params, const RuntimeShape &input_shape,
+ const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+ ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const int32 val = static_cast<int32_t>(input_data[i]);
+ int32 clamped = params.output_offset + MultiplyByQuantizedMultiplier(val - params.input_offset,
+ params.output_multiplier,
+ params.output_shift);
+ clamped = std::max(params.quantized_activation_min, clamped);
+ clamped = std::min(params.quantized_activation_max, clamped);
+ output_data[i] = static_cast<T>(clamped);
+ }
+}
+
+template <typename T>
+inline void ReluX(const tflite::ActivationParams &params, const RuntimeShape &input_shape,
+ const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+ ruy::profiler::ScopeLabel label("Quantized ReluX (not fused)");
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ const T max_value = params.quantized_activation_max;
+ const T min_value = params.quantized_activation_min;
+ for (int i = 0; i < flat_size; ++i)
+ {
+ const T val = input_data[i];
+ const T clamped = val > max_value ? max_value : val < min_value ? min_value : val;
+ output_data[i] = clamped;
+ }
+}
+
+// TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary
+// dimensionality if the runtime code does a single loop over one dimension
+// that handles broadcasting as the base case. The code generator would then
+// generate max(D1, D2) nested for loops.
+inline void BroadcastMulFivefold(const ArithmeticParams &unswitched_params,
+ const RuntimeShape &unswitched_input1_shape,
+ const uint8 *unswitched_input1_data,
+ const RuntimeShape &unswitched_input2_shape,
+ const uint8 *unswitched_input2_data,
+ const RuntimeShape &output_shape, uint8 *output_data)
+{
+ ArithmeticParams switched_params = unswitched_params;
+ switched_params.input1_offset = unswitched_params.input2_offset;
+ switched_params.input2_offset = unswitched_params.input1_offset;
+
+ const bool use_unswitched = unswitched_params.broadcast_category ==
+ tflite::BroadcastableOpCategory::kFirstInputBroadcastsFast;
+
+ const ArithmeticParams &params = use_unswitched ? unswitched_params : switched_params;
+ const uint8 *input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data;
+ const uint8 *input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data;
+
+ // Fivefold nested loops. The second input resets its position for each
+ // iteration of the second loop. The first input resets its position at the
+ // beginning of the fourth loop. The innermost loop is an elementwise Mul of
+ // sections of the arrays.
+ uint8 *output_data_ptr = output_data;
+ const uint8 *input1_data_ptr = input1_data;
+ const uint8 *input2_data_reset = input2_data;
+ int y0 = params.broadcast_shape[0];
+ int y1 = params.broadcast_shape[1];
+ int y2 = params.broadcast_shape[2];
+ int y3 = params.broadcast_shape[3];
+ int y4 = params.broadcast_shape[4];
+ for (int i0 = 0; i0 < y0; ++i0)
+ {
+ const uint8 *input2_data_ptr;
+ for (int i1 = 0; i1 < y1; ++i1)
+ {
+ input2_data_ptr = input2_data_reset;
+ for (int i2 = 0; i2 < y2; ++i2)
+ {
+ for (int i3 = 0; i3 < y3; ++i3)
+ {
+ MulElementwise(y4, params, input1_data_ptr, input2_data_ptr, output_data_ptr);
+ input2_data_ptr += y4;
+ output_data_ptr += y4;
+ }
+ input1_data_ptr += y4;
+ }
+ }
+ input2_data_reset = input2_data_ptr;
+ }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+ const int16 *input1_data, const RuntimeShape &input2_shape,
+ const int16 *input2_data, const RuntimeShape &output_shape, int16 *output_data)
+{
+ ruy::profiler::ScopeLabel label("Mul/Int16");
+
+ const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+ for (int i = 0; i < flat_size; i++)
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+ F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+ output_data[i] = unclamped_result.raw();
+ }
+}
+
+inline void Mul(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+ const int16 *input1_data, const RuntimeShape &input2_shape,
+ const int16 *input2_data, const RuntimeShape &output_shape, uint8 *output_data)
+{
+ ruy::profiler::ScopeLabel label("Mul/Int16Uint8");
+ int32 output_offset = params.output_offset;
+ int32 output_activation_min = params.quantized_activation_min;
+ int32 output_activation_max = params.quantized_activation_max;
+ TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+
+ const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
+ for (int i = 0; i < flat_size; i++)
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+
+ F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]);
+ int16 rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8);
+ int16 clamped_result = std::min<int16>(output_activation_max - output_offset, rescaled_result);
+ clamped_result = std::max<int16>(output_activation_min - output_offset, clamped_result);
+ output_data[i] = output_offset + clamped_result;
+ }
+}
+
+inline void Sub16(const ArithmeticParams &params, const RuntimeShape &input1_shape,
+ const int16_t *input1_data, const RuntimeShape &input2_shape,
+ const int16_t *input2_data, const RuntimeShape &output_shape,
+ int16_t *output_data)
+{
+ ruy::profiler::ScopeLabel label("Sub/Int16");
+ const int input1_shift = params.input1_shift;
+ const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape);
+ const int16 output_activation_min = params.quantized_activation_min;
+ const int16 output_activation_max = params.quantized_activation_max;
+
+ TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0);
+ TFLITE_DCHECK_LE(input1_shift, 0);
+ TFLITE_DCHECK_LE(params.input2_shift, 0);
+ const int16 *not_shift_input = input1_shift == 0 ? input1_data : input2_data;
+ const int16 *shift_input = input1_shift == 0 ? input2_data : input1_data;
+ const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift;
+
+ if (input1_shift == 0)
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+ for (int i = 0; i < flat_size; ++i)
+ {
+ F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+ F0 scaled_input =
+ F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+ F0 result = SaturatingSub(input_ready_scaled, scaled_input);
+ const int16 raw_output = result.raw();
+ const int16 clamped_output =
+ std::min(output_activation_max, std::max(output_activation_min, raw_output));
+ output_data[i] = clamped_output;
+ }
+ }
+ else
+ {
+ // F0 uses 0 integer bits, range [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+ for (int i = 0; i < flat_size; ++i)
+ {
+ F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]);
+ F0 scaled_input =
+ F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift));
+ F0 result = SaturatingSub(scaled_input, input_ready_scaled);
+ const int16 raw_output = result.raw();
+ const int16 clamped_output =
+ std::min(output_activation_max, std::max(output_activation_min, raw_output));
+ output_data[i] = clamped_output;
+ }
+ }
+}
+
+template <typename Scalar>
+void Pack(const PackParams &params, const RuntimeShape *const *input_shapes,
+ const Scalar *const *input_data, const RuntimeShape &output_shape, Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("Pack");
+ const int dimensions = output_shape.DimensionsCount();
+ int axis = params.axis;
+ int inputs_count = params.inputs_count;
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; i++)
+ {
+ outer_size *= output_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = params.axis + 1; i < dimensions; i++)
+ {
+ copy_size *= output_shape.Dims(i);
+ }
+ TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+ for (int i = 0; i < inputs_count; ++i)
+ {
+ for (int k = 0; k < outer_size; k++)
+ {
+ const Scalar *input_ptr = input_data[i] + copy_size * k;
+ int loc = k * inputs_count * copy_size + i * copy_size;
+ memcpy(output_data + loc, input_ptr, copy_size * sizeof(Scalar));
+ }
+ }
+}
+
+template <typename Scalar>
+void Unpack(const UnpackParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape &output_shape, Scalar *const *output_datas)
+{
+ ruy::profiler::ScopeLabel label("Unpack");
+ const int dimensions = input_shape.DimensionsCount();
+ const int outputs_count = params.num_split;
+
+ int outer_size = 1;
+ int axis = params.axis;
+ if (axis < 0)
+ {
+ axis += dimensions;
+ }
+ TFLITE_DCHECK_GE(axis, 0);
+ TFLITE_DCHECK_LT(axis, dimensions);
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = axis + 1; i < dimensions; ++i)
+ {
+ copy_size *= input_shape.Dims(i);
+ }
+ TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size);
+
+ for (int i = 0; i < outputs_count; ++i)
+ {
+ for (int k = 0; k < outer_size; k++)
+ {
+ Scalar *output_ptr = output_datas[i] + copy_size * k;
+ int loc = k * outputs_count * copy_size + i * copy_size;
+ memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+ }
+ }
+}
+
+template <typename Scalar>
+void PackWithScaling(const PackParams &params, const RuntimeShape *const *input_shapes,
+ const uint8 *const *input_data, const RuntimeShape &output_shape,
+ uint8 *output_data)
+{
+ ruy::profiler::ScopeLabel label("PackWithScaling");
+ const int dimensions = output_shape.DimensionsCount();
+ int axis = params.axis;
+ const int32 *input_zeropoint = params.input_zeropoint;
+ const float *input_scale = params.input_scale;
+ int inputs_count = params.inputs_count;
+ const int32 output_zeropoint = params.output_zeropoint;
+ const float output_scale = params.output_scale;
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; i++)
+ {
+ outer_size *= output_shape.Dims(i);
+ }
+ int copy_size = 1;
+ for (int i = axis + 1; i < dimensions; i++)
+ {
+ copy_size *= output_shape.Dims(i);
+ }
+ TFLITE_DCHECK_EQ((**input_shapes).FlatSize(), copy_size * outer_size);
+
+ Scalar *output_ptr = output_data;
+ const float inverse_output_scale = 1.f / output_scale;
+ for (int k = 0; k < outer_size; k++)
+ {
+ for (int i = 0; i < inputs_count; ++i)
+ {
+ if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale)
+ {
+ memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
+ }
+ else
+ {
+ assert(false);
+ const float scale = input_scale[i] * inverse_output_scale;
+ const float bias = -input_zeropoint[i] * scale;
+ auto input_ptr = input_data[i];
+ for (int j = 0; j < copy_size; ++j)
+ {
+ const int value =
+ static_cast<int32_t>(std::round(input_ptr[j] * scale + bias)) + output_zeropoint;
+ output_ptr[j] = static_cast<uint8_t>(std::max(std::min(255, value), 0));
+ }
+ }
+ output_ptr += copy_size;
+ }
+ }
+}
+
+template <typename Scalar>
+void DepthConcatenation(const ConcatenationParams &params, const RuntimeShape *const *input_shapes,
+ const Scalar *const *input_data, const RuntimeShape &output_shape,
+ Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("DepthConcatenation");
+ auto params_copy = params;
+ params_copy.axis = 3;
+ Concatenation(params_copy, input_shapes, input_data, output_shape, output_data);
+}
+
+inline void LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+ const float *input_data, const RuntimeShape &unextended_prev_activ_shape,
+ const float *prev_activ_data, const RuntimeShape &weights_shape,
+ const float *weights_data, const RuntimeShape &unextended_bias_shape,
+ const float *bias_data, const RuntimeShape &unextended_prev_state_shape,
+ const float *prev_state_data,
+ const RuntimeShape &unextended_output_state_shape, float *output_state_data,
+ const RuntimeShape &unextended_output_activ_shape, float *output_activ_data,
+ const RuntimeShape &unextended_concat_temp_shape, float *concat_temp_data,
+ const RuntimeShape &unextended_activ_temp_shape, float *activ_temp_data)
+{
+ TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+ const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+ const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+ const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+ const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+ const RuntimeShape output_state_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+ const RuntimeShape output_activ_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+ const RuntimeShape concat_temp_shape =
+ RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+ const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+ TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+ const int weights_dim_count = weights_shape.DimensionsCount();
+ const int batches = MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
+ output_state_shape, 0, output_activ_shape, 0);
+ const int height = MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
+ output_state_shape, 1, output_activ_shape, 1);
+ const int width = MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
+ output_state_shape, 2, output_activ_shape, 2);
+ const int input_depth = input_shape.Dims(3);
+ const int prev_activ_depth = prev_activ_shape.Dims(3);
+ const int total_input_depth = prev_activ_depth + input_depth;
+ TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+ TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+ const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+ TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+ TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+ const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+ 3, output_activ_shape, 3);
+ TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+
+ // Concatenate prev_activ and input data together
+ std::vector<float const *> concat_input_arrays_data;
+ std::vector<RuntimeShape const *> concat_input_arrays_shapes;
+ concat_input_arrays_data.push_back(input_data);
+ concat_input_arrays_data.push_back(prev_activ_data);
+ concat_input_arrays_shapes.push_back(&input_shape);
+ concat_input_arrays_shapes.push_back(&prev_activ_shape);
+ tflite::ConcatenationParams concat_params;
+ concat_params.axis = 3;
+ concat_params.inputs_count = concat_input_arrays_data.size();
+ Concatenation(concat_params, &(concat_input_arrays_shapes[0]), &(concat_input_arrays_data[0]),
+ concat_temp_shape, concat_temp_data);
+
+ // Fully connected
+ tflite::FullyConnectedParams fc_params;
+ fc_params.float_activation_min = std::numeric_limits<float>::lowest();
+ fc_params.float_activation_max = std::numeric_limits<float>::max();
+ FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape, weights_data,
+ bias_shape, bias_data, activ_temp_shape, activ_temp_data);
+
+ // Memory state update (the LSTM "guts")
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int w = 0; w < width; ++w)
+ {
+ for (int h = 0; h < height; ++h)
+ {
+ for (int c = 0; c < output_depth; ++c)
+ {
+ const float input_gate =
+ 1.f /
+ (1.f +
+ std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 0 * output_depth + c)]));
+ const float new_input =
+ std::tanh(activ_temp_data[Offset(activ_temp_shape, b, h, w, 1 * output_depth + c)]);
+ const float forget_gate =
+ 1.f /
+ (1.f +
+ std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 2 * output_depth + c)]));
+ const float output_gate =
+ 1.f /
+ (1.f +
+ std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w, 3 * output_depth + c)]));
+ const float new_state =
+ input_gate * new_input +
+ forget_gate * prev_state_data[Offset(prev_state_shape, b, h, w, c)];
+ output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
+ output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
+ output_gate * std::tanh(new_state);
+ }
+ }
+ }
+ }
+}
+
+// Quantized LSTM cell implementation.
+// The quantization of the input, output arrays is as follows:
+// - The input activations are quantized as uint8 on the interval
+// [-1, 127/128].
+// The rationale for that is that is the natural interval for output
+// activations (see next point) and these need to be concatenated together.
+// We could accommodate different ranges by re-scaling, but we empirically
+// found that setting the input activations range to be [-1, 127/128] in the
+// first place, removing the need for re-scaling, greatly improves accuracy.
+// - The output activations are quantized as uint8 on the interval
+// [-1, 127/128].
+// The rationale for that is that the definition of a LSTM cell makes them
+// intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
+// makes for simpler, more accurate fixed-point arithmetic.
+// - The output-at-previous-timestep state array is obviously quantized as
+// the output activations.
+// - The internal LSTM memory (not the output-at-previous-timestep, the other
+// internal state array) is int16-quantized and may use any power-of-two,
+// symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
+// StateIntegerBits below, see the below discussion of that template
+// parameter ("The StateIntegerBits template parameter").
+// - The output of the internal fully-connected node is int16-quantized
+// on the interval [-8, 8 * 32767/32768], the rationale for which is
+// explained just below ("Why [-8, 8] for fully-connected output?").
+//
+//
+// === The StateIntegerBits template parameter ===
+//
+// The StateIntegerBits template parameter controls the fixed-point format used
+// to represent the internal memory of the LSTM cell (not the
+// output-at-previous-timestep, the other internal state array). It's currently
+// a template parameter so that the model can control that. The most typical
+// value for StateIntegerBits is 4. Other plausible values are anywhere between
+// 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
+// and drop that template parameter. The reason why it can't be a runtime
+// parameter is that this controls the fixed-point format used, i.e. we need to
+// generate actually different code based on it. In particular, we generate code
+// for a fixed-point tanh() implementation for that format, which internally
+// uses a fixed-point exp() implementation, which internally uses a
+// barrel-shifter with a number of steps that depends on StateIntegerBits.
+// Another consequence of that is that a higher value of StateIntegerBits
+// results in a more expensive implementation (more barrel shifter steps
+// needed).
+//
+//
+// === Why [-8, 8] for fully-connected output? ===
+//
+// This array is only fed to Logistic and Tanh functions, for which
+// the quantized implementation will want to use fixed-point arithmetic,
+// requiring a power-of-two representation interval. Thus, we should right
+// away quantize this array to a power-of-two interval; otherwise,
+// implementation will need to rescale that, losing any benefit that a tighter
+// representation interval might otherwise yield, while introducing some
+// numerical error and computational overhead.
+//
+// Now, Logistic and Tanh
+// are nearly constant (nearly equal to their horizontal asymptotes)
+// outside of a small bounded interval around 0:
+//
+// Logistic(4) = 1 - 1.8e-2 Tanh(4) = 1 - 6.7e-4
+// Logistic(8) = 1 - 3.4e-4 Tanh(8) = 1 - 2.3e-7
+// Logistic(16) = 1 - 1.1e-7 Tanh(16) = 1 - 2.5e-14
+//
+// From this, we see that clamping to [-4, 4] would be too inaccurate
+// (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
+// while clamping to [-16, 16] would make no difference even in float32.
+// However, for a fixed-point implementation in 16-bit integers, using 5
+// integer bits to represent the [-16, 16] range would leave only 11
+// fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
+// representable values. Notice that is higher than the
+// worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
+// Using [-8, 8] thus seems like the better compromise overall, enjoying
+// an increment of 2.4e-4 between representable values and a worst-case
+// clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
+// [-16, 16].
+//
+// Moreover, all other things being equal, it is nice to choose the narrower
+// representation range, as that makes the implementation of fixed-point
+// math functions a little cheaper (each integer bit requires an additional
+// barrel-shifter atep in the implementation of exp(-x)). That is further
+// reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
+// sense for 32-bit float or 32-bit fixed-point quantization, but we are
+// aiming for 16-bit fixed-point quantization of these internal nodes here.
+//
+template <int StateIntegerBits>
+inline void
+LstmCell(const LstmCellParams &params, const RuntimeShape &unextended_input_shape,
+ const uint8 *input_data_uint8, const RuntimeShape &unextended_prev_activ_shape,
+ const uint8 *prev_activ_data_uint8, const RuntimeShape &weights_shape,
+ const uint8 *weights_data_uint8, const RuntimeShape &unextended_bias_shape,
+ const int32 *bias_data_int32, const RuntimeShape &unextended_prev_state_shape,
+ const int16 *prev_state_data_int16, const RuntimeShape &unextended_output_state_shape,
+ int16 *output_state_data_int16, const RuntimeShape &unextended_output_activ_shape,
+ uint8 *output_activ_data_uint8, const RuntimeShape &unextended_concat_temp_shape,
+ uint8 *concat_temp_data_uint8, const RuntimeShape &unextended_activ_temp_shape,
+ int16 *activ_temp_data_int16, void *gemmlowp_context)
+{
+ (void)gemmlowp_context; // only used in optimized code.
+ int32 weights_zero_point = params.weights_zero_point;
+ int32 accum_multiplier = params.accum_multiplier;
+ int accum_shift = params.accum_shift;
+ TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
+ const RuntimeShape input_shape = RuntimeShape::ExtendedShape(4, unextended_input_shape);
+ const RuntimeShape prev_activ_shape = RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
+ const RuntimeShape bias_shape = RuntimeShape::ExtendedShape(4, unextended_bias_shape);
+ const RuntimeShape prev_state_shape = RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
+ const RuntimeShape output_state_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
+ const RuntimeShape output_activ_shape =
+ RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
+ const RuntimeShape concat_temp_shape =
+ RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
+ const RuntimeShape activ_temp_shape = RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
+ TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
+
+ // Gather dimensions information, and perform consistency checks.
+ const int weights_dim_count = weights_shape.DimensionsCount();
+ const int outer_size = MatchingFlatSizeSkipDim(input_shape, 3, prev_activ_shape, prev_state_shape,
+ output_state_shape, output_activ_shape);
+ const int input_depth = input_shape.Dims(3);
+ const int prev_activ_depth = prev_activ_shape.Dims(3);
+ const int total_input_depth = prev_activ_depth + input_depth;
+ TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1), total_input_depth);
+ const int intern_activ_depth = MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
+ TFLITE_DCHECK_EQ(weights_shape.FlatSize(), intern_activ_depth * total_input_depth);
+ TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
+ TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
+ const int output_depth = MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
+ 3, output_activ_shape, 3);
+ TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
+ const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
+ const int fc_output_depth =
+ MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
+ const int fc_accum_depth = total_input_depth;
+ TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
+
+ // Depth-concatenate prev_activ and input data together.
+ uint8 const *concat_input_arrays_data[2] = {input_data_uint8, prev_activ_data_uint8};
+ const RuntimeShape *concat_input_arrays_shapes[2] = {&input_shape, &prev_activ_shape};
+ tflite::ConcatenationParams concat_params;
+ concat_params.axis = 3;
+ concat_params.inputs_count = 2;
+ Concatenation(concat_params, concat_input_arrays_shapes, concat_input_arrays_data,
+ concat_temp_shape, concat_temp_data_uint8);
+
+ // Implementation of the fully connected node inside the LSTM cell.
+ // The operands are 8-bit integers, the accumulators are internally 32bit
+ // integers, and the output is 16-bit fixed-point with 3 integer bits so
+ // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
+ // is explained in the function comment above.
+ for (int b = 0; b < fc_batches; ++b)
+ {
+ for (int out_c = 0; out_c < fc_output_depth; ++out_c)
+ {
+ // Internal accumulation.
+ // Initialize accumulator with the bias-value.
+ int32 accum = bias_data_int32[out_c];
+ // Accumulation loop.
+ for (int d = 0; d < fc_accum_depth; ++d)
+ {
+ int16 input_val = concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
+ int16 weights_val = weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
+ accum += input_val * weights_val;
+ }
+ // Down-scale the final int32 accumulator to the scale used by our
+ // (16-bit, using 3 integer bits) fixed-point format. The quantized
+ // multiplier and shift here have been pre-computed offline
+ // (e.g. by toco).
+ accum = MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
+ // Saturate, cast to int16, and store to the temporary activations array.
+ accum = std::max(-32768, std::min(32767, static_cast<int>(accum)));
+ activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
+ }
+ }
+
+ // Rest of the LSTM cell: tanh and logistic math functions, and some adds
+ // and muls, all done in 16-bit fixed-point.
+ for (int b = 0; b < outer_size; ++b)
+ {
+ for (int c = 0; c < output_depth; ++c)
+ {
+ // Define the fixed-point data types that we will use here. All use
+ // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
+ // They only differ by the number of integral vs. fractional bits,
+ // determining the range of values that they can represent.
+ //
+ // F0 uses 0 integer bits, range [-1, 1].
+ // This is the return type of math functions such as tanh, logistic,
+ // whose range is in [-1, 1].
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
+ // F3 uses 3 integer bits, range [-8, 8].
+ // This is the range of the previous fully-connected node's output,
+ // which is our input here.
+ using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
+ // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
+ // 2^StateIntegerBits]. It's used to represent the internal state, whose
+ // number of integer bits is currently dictated by the model. See comment
+ // on the StateIntegerBits template parameter above.
+ using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
+ // Implementation of input gate, using fixed-point logistic function.
+ F3 input_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
+ F0 input_gate_output = gemmlowp::logistic(input_gate_input);
+ // Implementation of input modulation gate, using fixed-point tanh
+ // function.
+ F3 input_modulation_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
+ F0 input_modulation_gate_output = gemmlowp::tanh(input_modulation_gate_input);
+ // Implementation of forget gate, using fixed-point logistic function.
+ F3 forget_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
+ F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
+ // Implementation of output gate, using fixed-point logistic function.
+ F3 output_gate_input =
+ F3::FromRaw(activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
+ F0 output_gate_output = gemmlowp::logistic(output_gate_input);
+ // Implementation of internal multiplication nodes, still in fixed-point.
+ F0 input_times_input_modulation = input_gate_output * input_modulation_gate_output;
+ FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
+ FS prev_state_times_forget_state = forget_gate_output * prev_state;
+ // Implementation of internal addition node, saturating.
+ FS new_state =
+ gemmlowp::SaturatingAdd(gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
+ prev_state_times_forget_state);
+ // Implementation of last internal Tanh node, still in fixed-point.
+ // Since a Tanh fixed-point implementation is specialized for a given
+ // number or integer bits, and each specialization can have a substantial
+ // code size, and we already used above a Tanh on an input with 3 integer
+ // bits, and per the table in the above function comment there is no
+ // significant accuracy to be lost by clamping to [-8, +8] for a
+ // 3-integer-bits representation, let us just do that. This helps people
+ // porting this to targets where code footprint must be minimized.
+ F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
+ F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
+ // Store the new internal state back to memory, as 16-bit integers.
+ // Note: here we store the original value with StateIntegerBits, not
+ // the rescaled 3-integer-bits value fed to tanh.
+ output_state_data_int16[b * output_depth + c] = new_state.raw();
+ // Down-scale the output activations to 8-bit integers, saturating,
+ // and store back to memory.
+ int16 rescaled_output_activ = gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
+ int16 clamped_output_activ =
+ std::max<int16>(-128, std::min<int16>(127, rescaled_output_activ));
+ output_activ_data_uint8[b * output_depth + c] = 128 + clamped_output_activ;
+ }
+ }
+}
+
+template <typename Scalar>
+void Split(const SplitParams &params, const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape *const *output_shapes, Scalar *const *output_data)
+{
+ ruy::profiler::ScopeLabel label("Split");
+ const int split_dimensions = input_shape.DimensionsCount();
+ int axis = params.axis < 0 ? params.axis + split_dimensions : params.axis;
+ int outputs_count = params.num_split;
+ TFLITE_DCHECK_LT(axis, split_dimensions);
+
+ int64_t split_size = 0;
+ for (int i = 0; i < outputs_count; i++)
+ {
+ TFLITE_DCHECK_EQ(output_shapes[i]->DimensionsCount(), split_dimensions);
+ for (int j = 0; j < split_dimensions; j++)
+ {
+ if (j != axis)
+ {
+ MatchingDim(*output_shapes[i], j, input_shape, j);
+ }
+ }
+ split_size += output_shapes[i]->Dims(axis);
+ }
+ TFLITE_DCHECK_EQ(split_size, input_shape.Dims(axis));
+ int64_t outer_size = 1;
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+ // For all output arrays,
+ // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+ int64_t base_inner_size = 1;
+ for (int i = axis + 1; i < split_dimensions; ++i)
+ {
+ base_inner_size *= input_shape.Dims(i);
+ }
+
+ const Scalar *input_ptr = input_data;
+ for (int k = 0; k < outer_size; k++)
+ {
+ for (int i = 0; i < outputs_count; ++i)
+ {
+ const int copy_size = output_shapes[i]->Dims(axis) * base_inner_size;
+ memcpy(output_data[i] + k * copy_size, input_ptr, copy_size * sizeof(Scalar));
+ input_ptr += copy_size;
+ }
+ }
+}
+
+inline int NodeOffset(int b, int h, int w, int height, int width)
+{
+ return (b * height + h) * width + w;
+}
+
+inline void LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+ const RuntimeShape &input_shape, const float *input_data,
+ const RuntimeShape &output_shape, float *output_data)
+{
+ const int trailing_dim = input_shape.DimensionsCount() - 1;
+ const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+ const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int c = 0; c < depth; ++c)
+ {
+ const int begin_input_c = std::max(0, static_cast<int>(c - op_params.range));
+ const int end_input_c = std::min(depth, static_cast<int>(c + op_params.range));
+ float accum = 0.f;
+ for (int input_c = begin_input_c; input_c < end_input_c; ++input_c)
+ {
+ const float input_val = input_data[i * depth + input_c];
+ accum += input_val * input_val;
+ }
+ const float multiplier = std::pow(op_params.bias + op_params.alpha * accum, -op_params.beta);
+ output_data[i * depth + c] = input_data[i * depth + c] * multiplier;
+ }
+ }
+}
+
+inline void Dequantize(const RuntimeShape &input_shape, const Eigen::half *input_data,
+ const RuntimeShape &output_shape, float *output_data)
+{
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ for (int i = 0; i < flat_size; i++)
+ {
+ output_data[i] = static_cast<float>(input_data[i]);
+ }
+}
+
+inline void FakeQuant(const tflite::FakeQuantParams &op_params, const RuntimeShape &input_shape,
+ const float *input_data, const RuntimeShape &output_shape, float *output_data)
+{
+ ruy::profiler::ScopeLabel label("FakeQuant");
+ float rmin = op_params.minmax.min;
+ float rmax = op_params.minmax.max;
+ int num_bits = op_params.num_bits;
+ // 0 should always be a representable value. Let's assume that the initial
+ // min,max range contains 0.
+ TFLITE_DCHECK_LE(rmin, 0.0f);
+ TFLITE_DCHECK_GE(rmax, 0.0f);
+ TFLITE_DCHECK_LT(rmin, rmax);
+
+ // Code matches tensorflow's FakeQuantWithMinMaxArgsFunctor.
+ int quant_min = 0;
+ int quant_max = (1 << num_bits) - 1;
+ float nudged_min, nudged_max, nudged_scale;
+ NudgeQuantizationRange(rmin, rmax, quant_min, quant_max, &nudged_min, &nudged_max, &nudged_scale);
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
+ FakeQuantizeArray(nudged_scale, nudged_min, nudged_max, input_data, output_data, flat_size);
+}
+
+// Common subroutine for both `GatherNd` and `GatherNdString`.
+struct GatherNdHelperResult
+{
+ int n_slices;
+ int slice_size;
+ int indices_nd;
+ std::vector<int> dims_to_count;
+};
+
+// Returns common values being used on both `GatherNd` and `GatherNdString`.
+inline GatherNdHelperResult GatherNdHelper(const RuntimeShape &params_shape,
+ const RuntimeShape &indices_shape)
+{
+ GatherNdHelperResult ret;
+ ret.n_slices = 1;
+ ret.slice_size = 1;
+ const int indices_dims = indices_shape.DimensionsCount();
+ ret.indices_nd = indices_shape.Dims(indices_dims - 1);
+ const int params_dims = params_shape.DimensionsCount();
+ for (int i = 0; i < indices_dims - 1; ++i)
+ {
+ ret.n_slices *= indices_shape.Dims(i);
+ }
+ for (int i = ret.indices_nd; i < params_dims; ++i)
+ {
+ ret.slice_size *= params_shape.Dims(i);
+ }
+
+ int remain_flat_size = params_shape.FlatSize();
+ ret.dims_to_count = std::vector<int>(ret.indices_nd, 0);
+ for (int i = 0; i < ret.indices_nd; ++i)
+ {
+ ret.dims_to_count[i] = remain_flat_size / params_shape.Dims(i);
+ remain_flat_size = ret.dims_to_count[i];
+ }
+
+ return ret;
+}
+
+template <typename ParamsT, typename IndicesT = int32>
+inline void GatherNd(const RuntimeShape &params_shape, const ParamsT *params_data,
+ const RuntimeShape &indices_shape, const IndicesT *indices_data,
+ const RuntimeShape &output_shape, ParamsT *output_data)
+{
+ ruy::profiler::ScopeLabel label("GatherNd");
+
+ const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+ for (int i = 0; i < res.n_slices; ++i)
+ {
+ int from_pos = 0;
+ for (int j = 0; j < res.indices_nd; ++j)
+ {
+ from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+ }
+ std::memcpy(output_data + i * res.slice_size, params_data + from_pos,
+ sizeof(ParamsT) * res.slice_size);
+ }
+}
+
+#ifndef TF_LITE_STATIC_MEMORY
+template <typename IndicesT = int32>
+inline void GatherNdString(const RuntimeShape &params_shape, const TfLiteTensor *params_data,
+ const RuntimeShape &indices_shape, const IndicesT *indices_data,
+ const RuntimeShape &output_shape, TfLiteTensor *output_data)
+{
+ ruy::profiler::ScopeLabel label("GatherNdString");
+
+ const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
+ DynamicBuffer buffer;
+ for (int i = 0; i < res.n_slices; ++i)
+ {
+ int from_pos = 0;
+ for (int j = 0; j < res.indices_nd; ++j)
+ {
+ from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
+ }
+ for (int j = 0; j < res.slice_size; ++j)
+ {
+ buffer.AddString(GetString(params_data, from_pos + j));
+ }
+ }
+ buffer.WriteToTensor(output_data, /*new_shape=*/nullptr);
+}
+#endif
+
+template <typename IndicesT, typename UpdatesT>
+inline void ScatterNd(const RuntimeShape &indices_shape, const IndicesT *indices_data,
+ const RuntimeShape &updates_shape, const UpdatesT *updates_data,
+ const RuntimeShape &output_shape, UpdatesT *output_data)
+{
+ ruy::profiler::ScopeLabel label("ScatterNd");
+
+ int n_slices = 1;
+ int slice_size = 1;
+ const int outer_dims = indices_shape.DimensionsCount() - 1;
+ const int indices_nd = indices_shape.Dims(outer_dims);
+ const int updates_dims = updates_shape.DimensionsCount();
+ for (int i = 0; i < outer_dims; ++i)
+ {
+ n_slices *= indices_shape.Dims(i);
+ }
+ for (int i = outer_dims; i < updates_dims; ++i)
+ {
+ slice_size *= updates_shape.Dims(i);
+ }
+
+ int output_flat_size = output_shape.FlatSize();
+ int remain_flat_size = output_flat_size;
+ std::vector<int> dims_to_count(indices_nd, 0);
+ for (int i = 0; i < indices_nd; ++i)
+ {
+ dims_to_count[i] = remain_flat_size / output_shape.Dims(i);
+ remain_flat_size = dims_to_count[i];
+ }
+
+ memset(output_data, 0, sizeof(UpdatesT) * output_flat_size);
+ for (int i = 0; i < n_slices; ++i)
+ {
+ int to_pos = 0;
+ for (int j = 0; j < indices_nd; ++j)
+ {
+ IndicesT idx = indices_data[i * indices_nd + j];
+ TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j));
+ to_pos += idx * dims_to_count[j];
+ }
+ for (int j = 0; j < slice_size; j++)
+ {
+ output_data[to_pos + j] += updates_data[i * slice_size + j];
+ }
+ }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+ const RuntimeShape &output_shape, SequentialTensorWriter<T> *writer)
+{
+ const RuntimeShape ext_shape = RuntimeShape::ExtendedShape(5, input_shape);
+ TFLITE_DCHECK_LE(op_params.begin_count, 5);
+ TFLITE_DCHECK_LE(op_params.size_count, 5);
+ const int begin_count = op_params.begin_count;
+ const int size_count = op_params.size_count;
+ // We front-pad the begin and size vectors.
+ std::array<int, 5> start;
+ std::array<int, 5> stop;
+ for (int i = 0; i < 5; ++i)
+ {
+ int padded_i = 5 - i;
+ start[i] = begin_count < padded_i ? 0 : op_params.begin[begin_count - padded_i];
+ stop[i] = (size_count < padded_i || op_params.size[size_count - padded_i] == -1)
+ ? ext_shape.Dims(i)
+ : start[i] + op_params.size[size_count - padded_i];
+ }
+
+ for (int i0 = start[0]; i0 < stop[0]; ++i0)
+ {
+ for (int i1 = start[1]; i1 < stop[1]; ++i1)
+ {
+ for (int i2 = start[2]; i2 < stop[2]; ++i2)
+ {
+ for (int i3 = start[3]; i3 < stop[3]; ++i3)
+ {
+ for (int i4 = start[4]; i4 < stop[4]; ++i4)
+ {
+ writer->Write(Offset(ext_shape, i0, i1, i2, i3, i4));
+ }
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+ const T *input_data, const RuntimeShape &output_shape, T *output_data)
+{
+ SequentialTensorWriter<T> writer(input_data, output_data);
+ return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+inline void Slice(const tflite::SliceParams &op_params, const RuntimeShape &input_shape,
+ const TfLiteTensor *input, const RuntimeShape &output_shape, TfLiteTensor *output)
+{
+ SequentialTensorWriter<T> writer(input, output);
+ return Slice(op_params, input_shape, output_shape, &writer);
+}
+
+template <typename T>
+void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+ auto min_value = input2_data[0];
+ for (int i = 0; i < flat_size; i++)
+ {
+ output_data[i] = input1_data[i] > min_value ? min_value : input1_data[i];
+ }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Minimum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+ const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+ // Drop shape of second input: not needed.
+ Minimum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T>
+void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const T *input2_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input1_shape, output_shape);
+
+ auto max_value = input2_data[0];
+ for (int i = 0; i < flat_size; i++)
+ {
+ output_data[i] = input1_data[i] < max_value ? max_value : input1_data[i];
+ }
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T>
+inline void Maximum(const RuntimeShape &input1_shape, const T *input1_data, const RuntimeShape &,
+ const T *input2_data, const RuntimeShape &output_shape, T *output_data)
+{
+ // Drop shape of second input: not needed.
+ Maximum(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename T1, typename T2, typename T3>
+void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data, const T3 *input2_data,
+ const RuntimeShape &output_shape, T2 *output_data)
+{
+ ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data, std::greater<T1>());
+}
+
+// Convenience version that allows, for example, generated-code calls to be
+// the same as other binary ops.
+template <typename T1, typename T2, typename T3>
+inline void ArgMax(const RuntimeShape &input1_shape, const T1 *input1_data,
+ const RuntimeShape &input2_shape, const T3 *input2_data,
+ const RuntimeShape &output_shape, T2 *output_data)
+{
+ // Drop shape of second input: not needed.
+ ArgMax(input1_shape, input1_data, input2_data, output_shape, output_data);
+}
+
+template <typename D, typename T>
+void Select(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ const RuntimeShape &input_x_shape, const T *input_x_data,
+ const RuntimeShape &input_y_shape, const T *input_y_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ int64_t flatsize;
+ // Allow select operator executions on mixed scalar tensors and one element
+ // tensors.
+ if (input_condition_shape.FlatSize() == 1 && input_x_shape.FlatSize() == 1 &&
+ input_y_shape.FlatSize() == 1 && output_shape.FlatSize() == 1)
+ {
+ flatsize = 1;
+ }
+ else
+ {
+ flatsize = MatchingFlatSize(input_condition_shape, input_x_shape, input_y_shape, output_shape);
+ }
+ for (int64_t i = 0; i < flatsize; ++i)
+ {
+ output_data[i] = input_condition_data[i] ? input_x_data[i] : input_y_data[i];
+ }
+}
+
+template <typename D, typename T>
+void RankOneSelect(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ const RuntimeShape &input_x_shape, const T *input_x_data,
+ const RuntimeShape &input_y_shape, const T *input_y_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int64_t outer_size = input_condition_shape.FlatSize();
+ int64_t inner_size;
+ if (input_condition_shape.DimensionsCount() == 0)
+ {
+ inner_size = MatchingFlatSize(input_x_shape, input_y_shape, output_shape);
+ }
+ else
+ {
+ TFLITE_DCHECK_EQ(MatchingDim(input_x_shape, 0, input_y_shape, 0, output_shape, 0), outer_size);
+ inner_size = MatchingFlatSizeSkipDim(input_x_shape, 0, input_y_shape, output_shape);
+ }
+
+ int64_t offset = 0;
+ for (int64_t i = 0; i < outer_size; i++)
+ {
+ const T *input_data = input_condition_data[i] ? input_x_data : input_y_data;
+ memcpy(output_data + offset, input_data + offset, inner_size * sizeof(T));
+ offset += inner_size;
+ }
+}
+
+template <typename D, typename T>
+void BroadcastSelect4DSlow(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ const RuntimeShape &input_x_shape, const T *input_x_data,
+ const RuntimeShape &input_y_shape, const T *input_y_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ TFLITE_DCHECK_LE(input_condition_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(input_x_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(input_y_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(output_shape.DimensionsCount(), 4);
+
+ const RuntimeShape extended_output_shape = RuntimeShape::ExtendedShape(4, output_shape);
+
+ NdArrayDesc<4> desc_condition;
+ NdArrayDesc<4> desc_x;
+ NdArrayDesc<4> desc_y;
+ NdArrayDescsForElementwiseBroadcast(input_condition_shape, input_x_shape, input_y_shape,
+ &desc_condition, &desc_x, &desc_y);
+
+ // In Tensorflow, the dimensions are canonically named (batch_number, row,
+ // col, channel), with extents (batches, height, width, depth), with the
+ // trailing dimension changing most rapidly (channels has the smallest
+ // stride, typically 1 element).
+ //
+ // In generated C code, we store arrays with the dimensions reversed. The
+ // first dimension has smallest stride.
+ //
+ // We name our variables by their Tensorflow convention, but generate C code
+ // nesting loops such that the innermost loop has the smallest stride for
+ // the best cache behavior.
+ for (int b = 0; b < extended_output_shape.Dims(0); ++b)
+ {
+ for (int y = 0; y < extended_output_shape.Dims(1); ++y)
+ {
+ for (int x = 0; x < extended_output_shape.Dims(2); ++x)
+ {
+ for (int c = 0; c < extended_output_shape.Dims(3); ++c)
+ {
+ const int condition_index = SubscriptToIndex(desc_condition, b, y, x, c);
+ const int x_index = SubscriptToIndex(desc_x, b, y, x, c);
+ const int y_index = SubscriptToIndex(desc_y, b, y, x, c);
+ output_data[Offset(extended_output_shape, b, y, x, c)] =
+ input_condition_data[condition_index] ? input_x_data[x_index] : input_y_data[y_index];
+ }
+ }
+ }
+ }
+}
+
+template <typename D, typename T>
+void SelectTrueCoords(const RuntimeShape &input_condition_shape, const D *input_condition_data,
+ T *output_data)
+{
+ const size_t size = input_condition_shape.FlatSize();
+ if (size == 0)
+ {
+ // Dimension is zero, in which case we don't need to output.
+ return;
+ }
+ const size_t cond_rank = input_condition_shape.DimensionsCount();
+
+ std::vector<int> dims_to_count(cond_rank, 0);
+ int cur_flat_size = size;
+ for (int i = 0; i < cond_rank; ++i)
+ {
+ dims_to_count[i] = cur_flat_size / input_condition_shape.Dims(i);
+ cur_flat_size = dims_to_count[i];
+ }
+
+ int output_index = 0;
+ for (int i = 0; i < size; ++i)
+ {
+ if (input_condition_data[i])
+ {
+ // Insert the coordinate of the current item (row major) into output.
+ int flat_index = i;
+ for (int j = 0; j < cond_rank; ++j)
+ {
+ int coord_j = flat_index / dims_to_count[j];
+ output_data[output_index * cond_rank + j] = coord_j;
+ flat_index %= dims_to_count[j];
+ }
+ output_index++;
+ }
+ }
+}
+
+// For easy implementation, the indices is always a vector of size-4 vectors.
+template <typename T, typename TI>
+inline void SparseToDense(const std::vector<std::vector<TI>> &indices, const T *values,
+ T default_value, bool value_is_scalar,
+ const RuntimeShape &unextended_output_shape, T *output_data)
+{
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+ const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+ const int value_count = indices.size();
+
+ // First fill the output_data with default value.
+ const int num_elements = output_shape.FlatSize();
+ for (int i = 0; i < num_elements; ++i)
+ {
+ output_data[i] = default_value;
+ }
+
+ // Special handle for value is scalar case to avoid checking the boolean
+ // condition within the loop every time.
+ if (value_is_scalar)
+ {
+ for (int i = 0; i < value_count; ++i)
+ {
+ const std::vector<TI> &index = indices[i];
+ TFLITE_DCHECK_EQ(index.size(), 4);
+ const T value = *values; // just use the first value.
+ output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+ }
+ return;
+ }
+
+ // Go through the values and indices to fill the sparse values.
+ for (int i = 0; i < value_count; ++i)
+ {
+ const std::vector<TI> &index = indices[i];
+ TFLITE_DCHECK_EQ(index.size(), 4);
+ const T value = values[i];
+ output_data[Offset(output_shape, index[0], index[1], index[2], index[3])] = value;
+ }
+}
+
+template <typename T>
+inline void Pow(const RuntimeShape &input1_shape, const T *input1_data,
+ const RuntimeShape &input2_shape, const T *input2_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int flat_size = MatchingFlatSize(input1_shape, input2_shape, output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ output_data[i] = std::pow(input1_data[i], input2_data[i]);
+ }
+}
+
+template <typename T>
+inline void BroadcastPow4DSlow(const RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const RuntimeShape &unextended_input2_shape, const T *input2_data,
+ const RuntimeShape &unextended_output_shape, T *output_data)
+{
+ TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+ const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+ NdArrayDesc<4> desc1;
+ NdArrayDesc<4> desc2;
+ NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape, &desc1,
+ &desc2);
+
+ for (int b = 0; b < output_shape.Dims(0); ++b)
+ {
+ for (int y = 0; y < output_shape.Dims(1); ++y)
+ {
+ for (int x = 0; x < output_shape.Dims(2); ++x)
+ {
+ for (int c = 0; c < output_shape.Dims(3); ++c)
+ {
+ auto out_idx = Offset(output_shape, b, y, x, c);
+ auto in1_idx = SubscriptToIndex(desc1, b, y, x, c);
+ auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
+ auto in1_val = input1_data[in1_idx];
+ auto in2_val = input2_data[in2_idx];
+ output_data[out_idx] = std::pow(in1_val, in2_val);
+ }
+ }
+ }
+ }
+}
+
+template <typename Scalar>
+void Reverse(int axis, const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape &output_shape, Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("Reverse");
+
+ int outer_size = 1;
+ for (int i = 0; i < axis; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+
+ int copy_size = 1;
+ for (int i = axis + 1; i < input_shape.DimensionsCount(); ++i)
+ {
+ copy_size *= input_shape.Dims(i);
+ }
+
+ const int dims_at_axis = input_shape.Dims(axis);
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int j = 0; j < dims_at_axis; ++j)
+ {
+ const int start_pos = (i * dims_at_axis + j) * copy_size;
+ Scalar *output_ptr = output_data + start_pos;
+ int loc = (i * dims_at_axis + dims_at_axis - j - 1) * copy_size;
+ memcpy(output_ptr, input_data + loc, copy_size * sizeof(Scalar));
+ }
+ }
+}
+
+template <typename Scalar, typename TS>
+void ReverseSequence(const TS *seq_lengths, const int seq_dim, const int batch_dim,
+ const RuntimeShape &input_shape, const Scalar *input_data,
+ const RuntimeShape &output_shape, Scalar *output_data)
+{
+ ruy::profiler::ScopeLabel label("ReverseSequence");
+
+ int outer_size = 1;
+ int outer_dim = std::min(batch_dim, seq_dim);
+ int medium_dim = std::max(batch_dim, seq_dim);
+ for (int i = 0; i < outer_dim; ++i)
+ {
+ outer_size *= input_shape.Dims(i);
+ }
+
+ int medium_size = 1;
+ for (int i = outer_dim + 1; i < medium_dim; ++i)
+ {
+ medium_size *= input_shape.Dims(i);
+ }
+
+ int copy_size = 1;
+ for (int i = medium_dim + 1; i < input_shape.DimensionsCount(); ++i)
+ {
+ copy_size *= input_shape.Dims(i);
+ }
+
+ const int dims_at_outer_dim = input_shape.Dims(outer_dim);
+ const int dims_at_medium_dim = input_shape.Dims(medium_dim);
+
+ Scalar *output_ptr;
+ if (batch_dim > seq_dim)
+ {
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int j = 0; j < dims_at_outer_dim; ++j)
+ {
+ const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+ for (int p = 0; p < medium_size; ++p)
+ {
+ for (int q = 0; q < dims_at_medium_dim; ++q)
+ {
+ const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+ const Scalar *in_ptr = input_data + in_pos;
+ int sl = seq_lengths[q] - 1;
+ if (j > sl)
+ {
+ output_ptr = output_data + in_pos;
+ }
+ else
+ {
+ const int out_pos_base = (i * dims_at_outer_dim + sl - j) * medium_size;
+ const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+ output_ptr = output_data + out_pos;
+ }
+ memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+ }
+ }
+ }
+ }
+ }
+ else if (batch_dim < seq_dim)
+ {
+ for (int i = 0; i < outer_size; ++i)
+ {
+ for (int j = 0; j < dims_at_outer_dim; ++j)
+ {
+ const int in_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+ int sl = seq_lengths[j] - 1;
+ const int out_pos_base = (i * dims_at_outer_dim + j) * medium_size;
+ for (int p = 0; p < medium_size; ++p)
+ {
+ for (int q = 0; q < dims_at_medium_dim; ++q)
+ {
+ const int in_pos = ((in_pos_base + p) * dims_at_medium_dim + q) * copy_size;
+ const Scalar *in_ptr = input_data + in_pos;
+ if (q > sl)
+ {
+ output_ptr = output_data + in_pos;
+ }
+ else
+ {
+ const int out_pos = ((out_pos_base + p) * dims_at_medium_dim + sl - q) * copy_size;
+ output_ptr = output_data + out_pos;
+ }
+ memcpy(output_ptr, in_ptr, copy_size * sizeof(Scalar));
+ }
+ }
+ }
+ }
+ }
+}
+
+template <typename T>
+inline void SegmentSum(const RuntimeShape &input_shape, const T *input_data,
+ const RuntimeShape &segment_ids_shape, const int32_t *segment_ids_data,
+ const RuntimeShape &output_shape, T *output_data)
+{
+ const int segment_flat_size = MatchingFlatSizeSkipDim(input_shape, 0, output_shape);
+
+ memset(output_data, 0, sizeof(T) * output_shape.FlatSize());
+
+ for (int i = 0; i < input_shape.Dims(0); i++)
+ {
+ int output_index = segment_ids_data[i];
+ for (int j = 0; j < segment_flat_size; ++j)
+ {
+ output_data[output_index * segment_flat_size + j] += input_data[i * segment_flat_size + j];
+ }
+ }
+}
+
+} // namespace reference_ops
+} // namespace tflite
+
+#endif // LUCI_INTERPRETER_PAL_REFERENCE_OPS_H
diff --git a/compiler/luci-interpreter/src/core/KernelParams.h b/compiler/luci-interpreter/src/core/KernelParams.h
index 958fd4b74..6c0220c62 100644
--- a/compiler/luci-interpreter/src/core/KernelParams.h
+++ b/compiler/luci-interpreter/src/core/KernelParams.h
@@ -170,6 +170,11 @@ struct ResizeNearestNeighborParams
bool half_pixel_centers;
};
+struct ShapeParams
+{
+ loco::DataType out_type;
+};
+
struct SubParams
{
Activation activation;
diff --git a/compiler/luci-interpreter/src/kernels/Fill.cpp b/compiler/luci-interpreter/src/kernels/Fill.cpp
new file mode 100644
index 000000000..e09d6331a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Fill.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Fill.h"
+#include "kernels/Utils.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output)
+ : Kernel({dims, value}, {output})
+{
+}
+
+template <typename T> void Fill::configureShape()
+{
+ const auto dims_data = getTensorData<T>(dims());
+ Shape output_shape(dims()->shape().dim(0));
+
+ for (int i = 0; i < output_shape.num_dims(); ++i)
+ {
+ T data = dims_data[i];
+ if (data < 0)
+ throw std::runtime_error("Fill dimensions must be >= 0");
+
+ output_shape.dim(i) = data;
+ }
+
+ output()->resize(output_shape);
+}
+
+void Fill::configure()
+{
+ const auto dims_shape = dims()->shape();
+ const auto value_shape = value()->shape();
+
+ // Make sure the 1st input tensor is 1-D
+ LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1);
+
+ // Make sure the 1st input tensor is int32 or int64
+ LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or
+ dims()->element_type() == DataType::S64);
+
+ // Make sure the 2nd input tensor is a scalar
+ LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0)
+
+ // Check zero point and scale for S16 and S8
+ if (value()->element_type() == loco::DataType::S16 or
+ value()->element_type() == loco::DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale());
+ LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point());
+
+ if (value()->element_type() == loco::DataType::S16)
+ LUCI_INTERPRETER_CHECK(value()->zero_point() == 0);
+ }
+ // Resize output
+ switch (dims()->element_type())
+ {
+ case DataType::S32:
+ configureShape<int32_t>();
+ break;
+ case DataType::S64:
+ configureShape<int64_t>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Fill::execute() const
+{
+ switch (output()->element_type())
+ {
+ case DataType::S8:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int8_t>(value()),
+ getTensorShape(output()), getTensorData<int8_t>(output()));
+ break;
+ case DataType::S16:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int16_t>(value()),
+ getTensorShape(output()), getTensorData<int16_t>(output()));
+ break;
+ case DataType::S32:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int32_t>(value()),
+ getTensorShape(output()), getTensorData<int32_t>(output()));
+ break;
+ case DataType::S64:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int64_t>(value()),
+ getTensorShape(output()), getTensorData<int64_t>(output()));
+ break;
+ case DataType::FLOAT32:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<float>(value()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Fill.h b/compiler/luci-interpreter/src/kernels/Fill.h
new file mode 100644
index 000000000..184f0cb83
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Fill.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FILL_H
+#define LUCI_INTERPRETER_KERNELS_FILL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Fill : public Kernel
+{
+public:
+ Fill(const Tensor *dims, const Tensor *value, Tensor *output);
+
+ const Tensor *dims() const { return _inputs[0]; }
+ const Tensor *value() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> void configureShape();
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FILL_H
diff --git a/compiler/luci-interpreter/src/kernels/Fill.test.cpp b/compiler/luci-interpreter/src/kernels/Fill.test.cpp
new file mode 100644
index 000000000..cf56df507
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Fill.test.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Fill.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FillTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+template <typename T, DataType DT> void runFillIntKernel(IMemoryManager *memory_manager)
+{
+ Shape dims_shape{2};
+
+ std::vector<int32_t> dims_data = {2, 3};
+ std::vector<T> value_data = {5};
+
+ Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager);
+ Tensor value = makeInputTensor<DT>(/*scalar*/ {}, value_data, memory_manager);
+
+ Tensor output_tensor = makeOutputTensor(DT);
+
+ Fill kernel(&dims, &value, &output_tensor);
+
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<T> ref_output_data{5, 5, 5, 5, 5, 5};
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data);
+
+ std::vector<int32_t> ref_output_shape{2, 3};
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+template <DataType DT> void runFillQuantIntKernel(IMemoryManager *memory_manager)
+{
+ Shape dims_shape{2};
+
+ std::vector<int32_t> dims_data = {2, 3};
+ std::vector<float> value_data = {5};
+
+ int32_t zero_point = 0;
+
+ if (DT == loco::DataType::S8)
+ zero_point = 1;
+
+ Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager);
+ Tensor value = makeInputTensor<DT>(/*scalar*/ {}, /*scale*/ 0.25, /*zero_point*/ zero_point,
+ value_data, memory_manager);
+
+ Tensor output_tensor = makeOutputTensor(DT, /*scale*/ 0.25, /*zero_point*/ zero_point);
+
+ Fill kernel(&dims, &value, &output_tensor);
+
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5};
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+
+ std::vector<int32_t> ref_output_shape{2, 3};
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FillTest, FillInt)
+{
+ // Run for int32_t input
+ runFillIntKernel<int32_t, loco::DataType::S32>(_memory_manager.get());
+ // Run for int64_t input
+ runFillIntKernel<int64_t, loco::DataType::S64>(_memory_manager.get());
+ // Run for int8_t input
+ runFillQuantIntKernel<loco::DataType::S8>(_memory_manager.get());
+ // Run for int16_t input
+ runFillQuantIntKernel<loco::DataType::S16>(_memory_manager.get());
+
+ SUCCEED();
+}
+
+TEST_F(FillTest, FillFloat)
+{
+ Shape dims_shape{3};
+
+ std::vector<int64_t> dims_data = {2, 2, 2};
+ std::vector<float> value_data = {5};
+
+ Tensor dims = makeInputTensor<loco::DataType::S64>(dims_shape, dims_data, _memory_manager.get());
+ Tensor value =
+ makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+ Fill kernel(&dims, &value, &output_tensor);
+
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5, 5, 5};
+
+ std::vector<int32_t> ref_output_shape{2, 2, 2};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), ref_output_data);
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FillTest, Invalid_Input_Shape_NEG)
+{
+ Shape dims_shape{1, 3};
+
+ std::vector<int32_t> dims_data = {2, 2, 2};
+ std::vector<float> value_data = {5};
+
+ Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get());
+ Tensor value =
+ makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+ Fill kernel(&dims, &value, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FillTest, Invalid_Value_Shape_NEG)
+{
+ Shape dims_shape{3};
+
+ std::vector<int32_t> dims_data = {2, 2, 2};
+ std::vector<float> value_data = {5};
+
+ Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get());
+ Tensor value = makeInputTensor<loco::DataType::FLOAT32>({1}, value_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+ Fill kernel(&dims, &value, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
index 2fbeefce4..bae1eac70 100644
--- a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
@@ -19,6 +19,8 @@
#include "kernels/Utils.h"
+#include <limits>
+
namespace luci_interpreter
{
namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Pack.cpp b/compiler/luci-interpreter/src/kernels/Pack.cpp
index 6fee93890..42aab330c 100644
--- a/compiler/luci-interpreter/src/kernels/Pack.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pack.cpp
@@ -76,9 +76,8 @@ void Pack::configure()
}
}
- if (t0->element_type() == DataType::S32 || t0->element_type() == DataType::U8 ||
- t0->element_type() == DataType::S8 || t0->element_type() == DataType::S16 ||
- t0->element_type() == DataType::S64)
+ if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 ||
+ t0->element_type() == DataType::S16)
{
LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point());
LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale());
diff --git a/compiler/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
index 2404e4303..d16320b78 100644
--- a/compiler/luci-interpreter/src/kernels/Pack.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
@@ -38,18 +38,26 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
std::vector<Tensor> tmp_inputs;
for (int i = 0; i < input_datas.size(); i++)
{
- if (std::is_same<T, float>::value)
+ if (std::is_same<T, float>::value || std::is_same<T, int32_t>::value ||
+ std::is_same<T, int64_t>::value)
{
tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, ""));
memory_manager->allocate_memory(tmp_inputs[i]);
tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
}
- else
+ else if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
{
tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, ""));
memory_manager->allocate_memory(tmp_inputs[i]);
tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
}
+ else
+ {
+ assert((std::is_same<T, int16_t>::value) && "unexpected dtype is tested");
+ tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f}, {0}}, ""));
+ memory_manager->allocate_memory(tmp_inputs[i]);
+ tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+ }
}
for (int i = 0; i < input_datas.size(); i++)
{
@@ -57,10 +65,14 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
}
Tensor output_tensor = makeOutputTensor(element_type);
- if (!std::is_same<T, float>::value)
+ if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
{
output_tensor = makeOutputTensor(element_type, 1.0f / 255, 128);
}
+ else if (std::is_same<T, int16_t>::value)
+ {
+ output_tensor = makeOutputTensor(element_type, 1.0f, 0);
+ }
PackParams params{};
params.axis = axis;
@@ -79,7 +91,7 @@ template <typename T> class PackTest : public ::testing::Test
{
};
-using DataTypes = ::testing::Types<uint8_t, float>;
+using DataTypes = ::testing::Types<uint8_t, int8_t, int16_t, int32_t, int64_t, float>;
TYPED_TEST_SUITE(PackTest, DataTypes);
TYPED_TEST(PackTest, ThreeInputs)
diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp
index fe172884b..c07f6e310 100644
--- a/compiler/luci-interpreter/src/kernels/Pad.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.cpp
@@ -20,6 +20,8 @@
#include <tensorflow/lite/kernels/internal/reference/pad.h>
+#include <limits>
+
namespace luci_interpreter
{
namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-interpreter/src/kernels/PadV2.cpp
index e90469239..197cdaa69 100644
--- a/compiler/luci-interpreter/src/kernels/PadV2.cpp
+++ b/compiler/luci-interpreter/src/kernels/PadV2.cpp
@@ -20,6 +20,8 @@
#include <tensorflow/lite/kernels/internal/reference/pad.h>
+#include <limits>
+
namespace luci_interpreter
{
namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.cpp b/compiler/luci-interpreter/src/kernels/ReduceMax.cpp
new file mode 100644
index 000000000..d58cd1563
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReduceMax.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReduceMax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
+
+#include <stdexcept>
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+// Returns the number of axes that will be reduced. Removes duplicates.
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
+{
+ int reduction_count = num_axes;
+ for (int i = 0; i < num_axes; ++i)
+ {
+ int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
+ assert(current >= 0 && current < input_num_dims);
+ for (int j = 0; j < i; j++)
+ {
+ int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
+ // This checks for duplicate axis
+ if (current == previous)
+ {
+ --reduction_count;
+ break;
+ }
+ }
+ }
+ return reduction_count;
+}
+
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
+ bool keep_dims)
+{
+ int input_num_dims = input_shape.num_dims();
+ if (input_num_dims == 0)
+ {
+ return Shape(0);
+ }
+
+ if (keep_dims)
+ {
+ Shape output_shape(input_num_dims);
+ for (int idx = 0; idx < input_num_dims; ++idx)
+ {
+ bool is_axis = false;
+ for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+ {
+ if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+ {
+ is_axis = true;
+ break;
+ }
+ }
+ if (is_axis)
+ {
+ output_shape.dim(idx) = 1;
+ }
+ else
+ {
+ output_shape.dim(idx) = input_shape.dim(idx);
+ }
+ }
+ return output_shape;
+ }
+ else
+ {
+ int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
+ Shape output_shape(input_num_dims - num_reduce_axes);
+ int num_skip_axes = 0;
+ for (int idx = 0; idx < input_num_dims; ++idx)
+ {
+ bool is_axis = false;
+ for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+ {
+ if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+ {
+ ++num_skip_axes;
+ is_axis = true;
+ break;
+ }
+ }
+ if (!is_axis)
+ {
+ output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
+ }
+ }
+ return output_shape;
+ }
+}
+
+ReduceMax::ReduceMax(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+ Tensor *resolved_axes, const ReducerParams &params)
+ : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes}, params)
+{
+}
+
+void ReduceMax::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+
+ const Shape &input_shape = input()->shape();
+ int input_num_dims = input_shape.num_dims();
+
+ const auto *axes_data = getTensorData<int32_t>(axes());
+ int num_axes = axes()->shape().num_elements();
+ LUCI_INTERPRETER_CHECK(num_axes <= 4);
+
+ // We compute shapes of outputs in configure, assuming that outputs have
+ // static shape
+ // TODO Support dynamic shape
+ Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
+ output()->resize(output_shape);
+
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+
+ temp_index->resize(Shape(input_num_dims));
+ resolved_axes->resize(Shape(num_axes));
+}
+
+void ReduceMax::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ // TODO Support quantized kernels
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void ReduceMax::evalFloat() const
+{
+ const auto *axes_data = getTensorData<int32_t>(axes());
+ int num_axes = axes()->shape().num_elements();
+
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+
+ int num_resolved_axis = 0;
+ LUCI_INTERPRETER_CHECK(
+ tflite::reference_ops::ResolveAxis(input()->shape().num_dims(), axes_data, num_axes,
+ getTensorData<int>(resolved_axes), &num_resolved_axis));
+
+ float init_value = std::numeric_limits<float>::lowest();
+ tflite::reference_ops::ReduceGeneric<float>(
+ getTensorData<float>(input()), getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+ getTensorData<float>(output()), getTensorShape(output()).DimsData(),
+ output()->shape().num_dims(), axes_data, num_axes, _params.keep_dims,
+ getTensorData<int>(temp_index), getTensorData<int>(resolved_axes), init_value,
+ [](const float current, const float in) -> float { return (in > current) ? in : current; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.h b/compiler/luci-interpreter/src/kernels/ReduceMax.h
new file mode 100644
index 000000000..25a66278a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReduceMax.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H
+#define LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ReduceMax : public KernelWithParams<ReducerParams>
+{
+public:
+ ReduceMax(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+ Tensor *resolved_axes, const ReducerParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *axes() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_REDUCE_MAX_H
diff --git a/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp b/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp
new file mode 100644
index 000000000..ab688827b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ReduceMax.test.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReduceMax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReduceMaxTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ReduceMaxTest, FloatNotKeepDims)
+{
+ std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
+ 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+ std::vector<int32_t> axis_data{1, 0, -3, -3};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ReducerParams params{};
+ params.keep_dims = false;
+
+ ReduceMax kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{23, 24};
+ std::initializer_list<int32_t> ref_output_shape{2};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ReduceMaxTest, FloatKeepDims)
+{
+ std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
+ 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+ std::vector<int32_t> axis_data{0, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ReducerParams params{};
+ params.keep_dims = true;
+
+ ReduceMax kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{20, 22, 24};
+ std::initializer_list<int32_t> ref_output_shape{1, 3, 1};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Shape.cpp b/compiler/luci-interpreter/src/kernels/Shape.cpp
new file mode 100644
index 000000000..0429fe1e5
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Shape.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Shape.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams &params)
+ : KernelWithParams<ShapeParams>({input}, {output}, params)
+{
+}
+
+void ShapeKernel::configure()
+{
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or
+ output()->element_type() == DataType::S64);
+ const auto input_shape = input()->shape();
+
+ Shape output_shape(1);
+ output_shape.dim(0) = input_shape.num_dims();
+
+ output()->resize(output_shape);
+}
+
+void ShapeKernel::execute() const
+{
+ switch (params().out_type)
+ {
+ case DataType::S32:
+ evalInt<int32_t>();
+ break;
+ case DataType::S64:
+ evalInt<int64_t>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+template <typename T> void ShapeKernel::evalInt() const
+{
+ const auto input_shape = input()->shape();
+
+ auto output_data = getTensorData<T>(output());
+
+ for (int i = 0; i < input_shape.num_dims(); ++i)
+ {
+ output_data[i] = input_shape.dim(i);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Shape.h b/compiler/luci-interpreter/src/kernels/Shape.h
new file mode 100644
index 000000000..cfaadec91
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Shape.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SHAPE_H
+#define LUCI_INTERPRETER_KERNELS_SHAPE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ShapeKernel : public KernelWithParams<ShapeParams>
+{
+public:
+ ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> void evalInt() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SHAPE_H
diff --git a/compiler/luci-interpreter/src/kernels/Shape.test.cpp b/compiler/luci-interpreter/src/kernels/Shape.test.cpp
new file mode 100644
index 000000000..4763e016c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Shape.test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Shape.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ShapeTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+template <typename T> void runShapeKernel(loco::DataType dataType, IMemoryManager *memory_manager)
+{
+ Shape input_shape{1, 3, 1, 3, 5};
+
+ Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, "");
+ Tensor output_tensor = makeOutputTensor(dataType);
+
+ ShapeParams params{};
+ params.out_type = dataType;
+
+ ShapeKernel kernel(&input_tensor, &output_tensor, params);
+
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<T> ref_output_data{1, 3, 1, 3, 5};
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data);
+
+ std::vector<int32_t> ref_output_shape{5};
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ShapeTest, OutTypeInt)
+{
+
+ // Run for int32_t output
+ runShapeKernel<int32_t>(loco::DataType::S32, _memory_manager.get());
+ // Run for int64_t output
+ runShapeKernel<int64_t>(loco::DataType::S64, _memory_manager.get());
+
+ SUCCEED();
+}
+
+TEST_F(ShapeTest, Invalid_Output_Type_NEG)
+{
+ Shape input_shape{1, 3};
+
+ Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, "");
+ Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+ ShapeParams params{};
+ params.out_type = loco::DataType::FLOAT32;
+
+ ShapeKernel kernel(&input_tensor, &output_tensor, params);
+
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-interpreter/src/kernels/SplitV.cpp
index 281988272..aa6820889 100644
--- a/compiler/luci-interpreter/src/kernels/SplitV.cpp
+++ b/compiler/luci-interpreter/src/kernels/SplitV.cpp
@@ -43,14 +43,36 @@ void SplitV::configure()
auto sizes_data = getTensorData<int32_t>(size_splits());
assert(size_splits()->shape().num_dims() == 1);
+
+ int32_t sum = 0;
+ const auto num_dims_size_spits = size_splits()->shape().dim(0);
+ int32_t count_neg_dim = 0;
+
+ for (int32_t i = 0; i < num_dims_size_spits - 1; ++i)
+ {
+ if (sizes_data[i] != -1)
+ {
+ sum += sizes_data[i];
+ }
+ else
+ {
+ count_neg_dim++;
+ }
+ }
+ assert(count_neg_dim < 2);
assert(size_splits()->shape().num_elements() == num_split);
- assert(std::accumulate(sizes_data, sizes_data + num_split, 0) ==
- input()->shape().dim(_axis_value));
auto output_shape = input()->shape();
for (int32_t i = 0; i < num_split; ++i)
{
- output_shape.dim(_axis_value) = sizes_data[i];
+ if (sizes_data[i] == -1)
+ {
+ output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum;
+ }
+ else
+ {
+ output_shape.dim(_axis_value) = sizes_data[i];
+ }
_outputs[i]->resize(output_shape);
}
}
diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
index c6452cdb0..a8730d861 100644
--- a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
+++ b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
@@ -136,6 +136,11 @@ void StridedSlice::execute() const
getTensorData<uint8_t>(input()), getTensorShape(output()),
getTensorData<uint8_t>(output()));
break;
+ case DataType::S32:
+ tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+ getTensorData<int32_t>(input()), getTensorShape(output()),
+ getTensorData<int32_t>(output()));
+ break;
default:
throw std::runtime_error("Unsupported type.");
}
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
index dba39050c..40207090b 100644
--- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
@@ -187,7 +187,7 @@ void GraphLoader::loadTensors()
const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
- throw std::runtime_error("Unknown Custom Node, yet.");
+ throw std::runtime_error("Unsupported Custom operator. " + node->name());
if (!isTensorProducingNode(node))
continue;
diff --git a/compiler/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-interpreter/src/loader/nodes/Add.cpp
index decccaa1d..501e84752 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Add.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Add.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleAdd(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleAdd *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleAdd *>(circle_node);
assert(node->arity() == 2);
const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp
index 0ee367748..f3ca55744 100644
--- a/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleArgMax(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleArgMax *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleArgMax *>(circle_node);
assert(node->arity() == 2);
const Tensor *input = helper.getInputTensor(node->input());
const Tensor *axis = helper.getInputTensor(node->dimension());
diff --git a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
index efb011257..a8135706f 100644
--- a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
@@ -25,9 +25,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleAveragePool2D *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->value());
diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
index aae3dbab1..9da2f6d93 100644
--- a/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
@@ -25,9 +25,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleBatchMatMul *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node);
assert(node->arity() == 2);
const Tensor *lhs = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
index 33d0e2db6..ac6ebb30f 100644
--- a/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleBatchToSpaceND *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleBatchToSpaceND *>(circle_node);
assert(node->arity() == 3);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp
index 21ea5ceab..a16354c96 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Cast.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleCast(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleCast *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleCast *>(circle_node);
assert(node->arity() == 1);
diff --git a/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp
index 7823a9967..ba2564ea2 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleConcatenation(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleConcatenation *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleConcatenation *>(circle_node);
std::vector<const Tensor *> inputs(node->numValues());
for (uint32_t i = 0; i < node->numValues(); ++i)
{
diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
index b48d97d19..218165e20 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
@@ -25,9 +25,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleConv2D *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleConv2D *>(circle_node);
assert(node->arity() == 3);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
index 0310fb23f..174946367 100644
--- a/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleDepthToSpace *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleDepthToSpace *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
index db26ecf2e..8af1e3b58 100644
--- a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
@@ -25,9 +25,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
assert(node->arity() == 3);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp
index 4aae56469..787322e9b 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleDequantize *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleDequantize *>(circle_node);
const Tensor *input = helper.getInputTensor(node->input());
Tensor *output = helper.getOutputTensor(node);
diff --git a/compiler/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-interpreter/src/loader/nodes/Div.cpp
index 56c2e98f2..0611dfdab 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Div.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Div.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleDiv(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleDiv *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleDiv *>(circle_node);
assert(node->arity() == 2);
const Tensor *input1 = helper.getInputTensor(node->x());
const Tensor *input2 = helper.getInputTensor(node->y());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp
index 98ee78be7..a79985e3b 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Elu.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleElu(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleElu *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleElu *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->features());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp
index 649d9bfe9..59692883f 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Equal.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp
@@ -25,9 +25,7 @@ std::unique_ptr<Kernel> build_kernel_CircleEqual(const luci::CircleNode *circle_
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleEqual *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleEqual *>(circle_node);
assert(node->arity() == 2);
const Tensor *x = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp
index 411d142c3..30d11cb89 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Exp.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleExp(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleExp *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleExp *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Fill.cpp b/compiler/luci-interpreter/src/loader/nodes/Fill.cpp
new file mode 100644
index 000000000..3aefdf1c5
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Fill.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Fill.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFill(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleFill *>(circle_node);
+ assert(node->arity() == 2);
+
+ const auto dims = helper.getInputTensor(node->dims());
+ const auto value = helper.getInputTensor(node->value());
+ auto output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Fill>(dims, value, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp
index 6d8435f6c..e0a223116 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Floor.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleFloor(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleFloor *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleFloor *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp
index cae2e186e..a45d89e38 100644
--- a/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleFloorDiv *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleFloorDiv *>(circle_node);
assert(node->arity() == 2);
const Tensor *x = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
index 0b8ac44bd..b7b742b8a 100644
--- a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleFullyConnected *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleFullyConnected *>(circle_node);
assert(node->arity() == 3);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp
index 9df9775c5..2ee2906e0 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Gather.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleGather *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleGather *>(circle_node);
assert(node->arity() == 2);
const Tensor *params = helper.getInputTensor(node->params());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp
index 3db11b840..80aa63cf0 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Greater.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleGreater(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleGreater *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleGreater *>(circle_node);
assert(node->arity() == 2);
const Tensor *x = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
index dbe051d67..272f2843b 100644
--- a/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleGreaterEqual *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleGreaterEqual *>(circle_node);
assert(node->arity() == 2);
const Tensor *x = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-interpreter/src/loader/nodes/If.cpp
index 5983f4d3b..3ac7d4941 100644
--- a/compiler/luci-interpreter/src/loader/nodes/If.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/If.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleIf(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleIf *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleIf *>(circle_node);
auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node);
assert(node->arity() == 1 + node->input_count());
assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
diff --git a/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
index 0a8fb85e2..06031e5bc 100644
--- a/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleInstanceNorm *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleInstanceNorm *>(circle_node);
assert(node->arity() == 3);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp
index 05f920266..6e22e6d4e 100644
--- a/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleL2Normalize *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleL2Normalize *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
index 0e70afafa..95b55896f 100644
--- a/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleL2Pool2D *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleL2Pool2D *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->value());
diff --git a/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
index 7b229ad0e..bbf5067b1 100644
--- a/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleLeakyRelu *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleLeakyRelu *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->features());
Tensor *output = helper.getOutputTensor(node);
diff --git a/compiler/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-interpreter/src/loader/nodes/Less.cpp
index 81156f275..ae914ecc9 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Less.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Less.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleLess(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleLess *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleLess *>(circle_node);
assert(node->arity() == 2);
const Tensor *x = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp
index 82141e5ae..f1b424b55 100644
--- a/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleLessEqual(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleLessEqual *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleLessEqual *>(circle_node);
assert(node->arity() == 2);
const Tensor *x = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
index a12dce0a0..962ca2d7c 100644
--- a/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
@@ -25,9 +25,7 @@ std::unique_ptr<Kernel>
build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleLocalResponseNormalization *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleLocalResponseNormalization *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->input());
Tensor *output = helper.getOutputTensor(node);
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
index 6cf547aae..432204115 100644
--- a/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleLogSoftmax *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleLogSoftmax *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->logits());
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
index 2c9549f71..bf3cb671a 100644
--- a/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleLogicalAnd *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleLogicalAnd *>(circle_node);
assert(node->arity() == 2);
const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp
index 3d327d6c4..fefcd9a06 100644
--- a/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleLogicalNot *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleLogicalNot *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp
index 50566bb30..a416cb401 100644
--- a/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleLogicalOr *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleLogicalOr *>(circle_node);
assert(node->arity() == 2);
const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp
index e4160edb3..4a69deef1 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleLogistic(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleLogistic *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleLogistic *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
index 914f22838..f66a206ca 100644
--- a/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleMaxPool2D *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleMaxPool2D *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->value());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp
index dc50d6773..d0bff776a 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleMaximum(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleMaximum *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleMaximum *>(circle_node);
assert(node->arity() == 2);
const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp
index 97d91207f..0dec63e79 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Mean.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleMean(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleMean *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleMean *>(circle_node);
assert(node->arity() == 2);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp
index ff659524a..1a49c1090 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleMinimum(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleMinimum *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleMinimum *>(circle_node);
assert(node->arity() == 2);
const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp
index ebf294583..b221b4574 100644
--- a/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleMirrorPad *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleMirrorPad *>(circle_node);
assert(node->arity() == 2);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp
index 4f9da967d..f9984853a 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Mul.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleMul(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleMul *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleMul *>(circle_node);
assert(node->arity() == 2);
const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp
index 23c00537b..9a9ecf991 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Neg.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleNeg(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleNeg *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleNeg *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp
index 8e5711fc1..3916a5854 100644
--- a/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleNotEqual(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleNotEqual *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleNotEqual *>(circle_node);
assert(node->arity() == 2);
const Tensor *x = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp
index e31601bf6..f3d700c95 100644
--- a/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CirclePRelu(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CirclePRelu *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CirclePRelu *>(circle_node);
assert(node->arity() == 2);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp
index 699472081..efc5850e0 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Pack.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CirclePack(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CirclePack *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CirclePack *>(circle_node);
assert(node->arity() == node->values_count());
std::vector<const Tensor *> inputs(node->values_count());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp
index 770549295..67ce997a7 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Pad.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CirclePad(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CirclePad *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CirclePad *>(circle_node);
assert(node->arity() == 2);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp
index 12deb15f0..e378a972a 100644
--- a/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CirclePadV2(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CirclePadV2 *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CirclePadV2 *>(circle_node);
assert(node->arity() == 3);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp
index b430bc94f..d32fc3dbb 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Pow.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CirclePow(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CirclePow *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CirclePow *>(circle_node);
assert(node->arity() == 2);
const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp
index fd9836345..cb36fb6da 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp
@@ -24,9 +24,8 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleQuantize *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleQuantize *>(circle_node);
+ assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->input());
Tensor *output = helper.getOutputTensor(node);
diff --git a/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp
new file mode 100644
index 000000000..1a8522dd6
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ReduceMax.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ReduceMax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReduceMax(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleReduceMax *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *axes = helper.getInputTensor(node->reduction_indices());
+ Tensor *output = helper.getOutputTensor(node);
+
+ auto temp_index_unique =
+ std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+ temp_index_unique->set_observable(false);
+ temp_index_unique->set_data_buffer(nullptr);
+ Tensor *temp_index =
+ helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
+
+ auto resolved_axes_unique =
+ std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+ resolved_axes_unique->set_observable(false);
+ resolved_axes_unique->set_data_buffer(nullptr);
+ Tensor *resolved_axes =
+ helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
+
+ ReducerParams params{};
+ params.keep_dims = node->keep_dims();
+
+ return std::make_unique<kernels::ReduceMax>(input, axes, output, temp_index, resolved_axes,
+ params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp
index d53a66a06..1d64c1c4e 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Relu.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleRelu(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleRelu *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleRelu *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->features());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp
index f1b5d219b..e50cd2545 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleRelu6(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleRelu6 *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleRelu6 *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->features());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp
index 89e3ecebf..76ddd88a3 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleReshape(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleReshape *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleReshape *>(circle_node);
assert(node->arity() == 2);
const Tensor *input = helper.getInputTensor(node->tensor());
diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
index dca56588d..dc2b88ad3 100644
--- a/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleResizeBilinear *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleResizeBilinear *>(circle_node);
assert(node->arity() == 2);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
index d1ea19c0f..c7058ae78 100644
--- a/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
@@ -25,9 +25,7 @@ std::unique_ptr<Kernel>
build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleResizeNearestNeighbor *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(circle_node);
assert(node->arity() == 2);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp
index ea00f5408..c1a7f5350 100644
--- a/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleReverseV2(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleReverseV2 *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleReverseV2 *>(circle_node);
assert(node->arity() == 2);
const Tensor *input = helper.getInputTensor(node->tensor());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp
index ff87f435c..0714a5dba 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleRsqrt(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleRsqrt *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleRsqrt *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp
index 89528d5ee..d172ef438 100644
--- a/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp
@@ -24,9 +24,8 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleSVDF *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleSVDF *>(circle_node);
+ assert(node->arity() == 5);
const Tensor *input = helper.getInputTensor(node->input());
const Tensor *feature = helper.getInputTensor(node->weight_feature());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Shape.cpp b/compiler/luci-interpreter/src/loader/nodes/Shape.cpp
new file mode 100644
index 000000000..d1edbc794
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Shape.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Shape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleShape(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleShape *>(circle_node);
+ assert(node->arity() == 1);
+
+ const auto input = helper.getInputTensor(node->input());
+ auto output = helper.getOutputTensor(node);
+
+ ShapeParams shape_params{};
+ shape_params.out_type = node->out_type();
+
+ return std::make_unique<kernels::ShapeKernel>(input, output, shape_params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp
index 741cd0806..60ac6417c 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Slice.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleSlice(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleSlice *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleSlice *>(circle_node);
assert(node->arity() == 3);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp
index b15e4b6f3..f41f63f6f 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleSoftmax(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleSoftmax *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleSoftmax *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->logits());
diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
index 91c237aa5..b6e6cf516 100644
--- a/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleSpaceToBatchND *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleSpaceToBatchND *>(circle_node);
assert(node->arity() == 3);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
index 3cbbd9718..63fdb95ec 100644
--- a/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleSpaceToDepth *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleSpaceToDepth *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-interpreter/src/loader/nodes/Split.cpp
index 32553ad5e..3f6d4a7df 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Split.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Split.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleSplit(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleSplit *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleSplit *>(circle_node);
auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node);
assert(node->arity() == 2);
assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
diff --git a/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp
index d78816447..0788822ca 100644
--- a/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleSplitV(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleSplitV *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleSplitV *>(circle_node);
auto output_nodes = collectOutputNodes<luci::CircleSplitVOut>(node);
assert(node->arity() == 3);
assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
diff --git a/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp
index 56dd986f1..b9843fe0b 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleSqrt(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleSqrt *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleSqrt *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-interpreter/src/loader/nodes/Square.cpp
index 43aadb969..0ad7c1772 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Square.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Square.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleSquare(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleSquare *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleSquare *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
index 6a2717aa2..e4c6fd851 100644
--- a/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleSquaredDifference *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleSquaredDifference *>(circle_node);
assert(node->arity() == 2);
const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp
index 583ff9314..6885f8077 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleSqueeze(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleSqueeze *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleSqueeze *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp
index fe5fa7707..359b4e3e9 100644
--- a/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleStridedSlice *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleStridedSlice *>(circle_node);
assert(node->arity() == 4);
const Tensor *input = helper.getInputTensor(node->input());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp
index bad4fbb13..a6252cb53 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Sub.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleSub(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleSub *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleSub *>(circle_node);
assert(node->arity() == 2);
const Tensor *input1 = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp
index f4255291b..a58ef60a8 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleTanh(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleTanh *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleTanh *>(circle_node);
assert(node->arity() == 1);
const Tensor *input = helper.getInputTensor(node->x());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp
index 4e095fbbc..ea17d8311 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleTranspose(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleTranspose *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleTranspose *>(circle_node);
assert(node->arity() == 2);
const Tensor *input = helper.getInputTensor(node->a());
diff --git a/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp
index 1b954c35c..d773e301e 100644
--- a/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleTransposeConv *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleTransposeConv *>(circle_node);
assert(node->arity() == 4);
const Tensor *input_sizes = helper.getInputTensor(node->inputSizes());
diff --git a/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp
index 978c738c6..a1c0d323a 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleUnpack(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleUnpack *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleUnpack *>(circle_node);
auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node);
assert(node->arity() == 1);
assert(output_nodes.size() == static_cast<size_t>(node->num()));
diff --git a/compiler/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-interpreter/src/loader/nodes/While.cpp
index 284dc0c68..8fde6ec8a 100644
--- a/compiler/luci-interpreter/src/loader/nodes/While.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/While.cpp
@@ -24,9 +24,7 @@ namespace luci_interpreter
std::unique_ptr<Kernel> build_kernel_CircleWhile(const luci::CircleNode *circle_node,
KernelBuilderHelper &helper)
{
- const auto *node = dynamic_cast<const luci::CircleWhile *>(circle_node);
- if (node == nullptr)
- throw std::runtime_error("wrong builder for operation");
+ const auto *node = loco::must_cast<const luci::CircleWhile *>(circle_node);
auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node);
assert(node->arity() == node->input_count());
diff --git a/compiler/luci-micro/CMakeLists.txt b/compiler/luci-micro/CMakeLists.txt
index c8a2e12e1..642cf14a3 100644
--- a/compiler/luci-micro/CMakeLists.txt
+++ b/compiler/luci-micro/CMakeLists.txt
@@ -15,7 +15,7 @@ set(CMAKE_ARM_OPTIONS
-DLUCI_STATIC=ON
-DBUILD_CMSIS_NN_FUNCTIONS=ON
-DTARGET_CPU=cortex-m7
- "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/nncc/cmake/buildtool/config/arm-non-eabi-gcc.cmake"
+ "-DCMAKE_TOOLCHAIN_FILE=${NNAS_PROJECT_SOURCE_DIR}/infra/nncc/cmake/buildtool/config/arm-none-eabi-gcc.cmake"
"-DLUCI_INTERPRETER_PAL_DIR=${CMAKE_CURRENT_SOURCE_DIR}/../luci-interpreter/pal/mcu"
"-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}"
"-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}"
diff --git a/compiler/luci-micro/luci-interpreter/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/CMakeLists.txt
new file mode 100644
index 000000000..1f7acee87
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
+set(LUCI_INTERPRETER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
+if (NOT LUCI_INTERPRETER_PAL_DIR)
+ set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/linux")
+endif()
+
+set(KERNEL_REGISTER_FILE ${LUCI_INTERPRETER_PAL_DIR}/KernelsToBuild.lst)
+
+if (NOT DEFINED CUSTOM_LUCI_INTERPRETER_SUFFIX)
+ set(LUCI_INTERPRETER_SUFFIX "")
+else()
+ set(LUCI_INTERPRETER_SUFFIX ${CUSTOM_LUCI_INTERPRETER_SUFFIX})
+endif()
+
+add_subdirectory(src)
diff --git a/compiler/luci-micro/luci-interpreter/README.md b/compiler/luci-micro/luci-interpreter/README.md
new file mode 100644
index 000000000..77ec5c81c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/README.md
@@ -0,0 +1,158 @@
+# luci-interpreter
+
+`luci-interpreter` is an inference engine for neural networks represented in luci IR.
+See `compiler/luci/lang` directory for details about IR.
+You can find useful infrastructure, like importer/exporter, optimizations in `compiler/luci`.
+
+`luci-interpreter` provides:
+- Basic inference functionality, input setters and output getters
+- Interface for inspecting hidden interpreter state, like activation values during inference
+- Customization mechanisms to fit the interpreter to specific platforms, like MCUs
+
+Public interface headers are placed in `luci-interpreter/include/luci_interpreter` directory
+
+## Basic usage
+
+Minimal usage includes:
+- Setting input data
+- Running inference
+- Fetching inference results
+
+Interpreter object is reusable and can run multiple inferences.
+Elements in tensors (input/output/internal) are stored contiguously and have C-like layout:
+This means for tensor t=[[0, 1],[2, 3]], t[0,1] == 1.
+
+Input and output tensors have the same indexes as in original luci model.
+
+**Usage example:**
+``` c++
+// Note getTensorSize is a function that computes tensor size,
+// it is not part of interpreter and should be implemented by user
+
+luci_interpreter::Interpreter interpreter(luci_module);
+
+// Set inputs
+// assuming model has only one input and one output
+const auto input_nodes = loco::input_nodes(module->graph());
+
+const auto *input_node = dynamic_cast<const luci::CircleInput *>(input_nodes[0]);
+std::vector<char> input_data(getTensorSize(input_node));
+// Initialize input data here
+
+interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+
+// Start inference
+interpreter.interpret();
+
+// Fetch inference results
+const auto output_nodes = loco::output_nodes(module->graph());
+const auto *output_node = dynamic_cast<const luci::CircleOutput *>(output_nodes[0]);
+std::vector<char> output_data(getTensorSize(output_node));
+interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+```
+
+## Inspecting intermediate state
+
+Interpreter provides interfaces to investigate internal state of interpreter during inference.
+
+This is done by "observer" mechanism:
+- `Interpreter` class has `attachObserver` method, which takes pointer to `ExecutionObserver` object
+- `ExecutionObserver` defines several callback methods user can override to inject custom code
+
+ExecutionObserver provides three callbacks:
+- `postTensorWrite` checks contents of output tensor after operation execution
+- `preOperatorExecute` notifies that interpreter is going to execute operation
+- `postOperatorExecute` notifies that interpreter has finished execution of an operation
+
+See `luci-interpreter/include/luci_interpreter/Interpreter.h` for this interface details.
+
+**Usage example:**
+``` c++
+class CustomExecutionObserver: public luci_interpreter::ExecutionObserver
+{
+public:
+ void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor) override
+ {
+ if (tensor->element_type() != loco::DataType::FLOAT32)
+ return;
+ for (int i = 0; i < tensor->shape().num_elements(); ++i)
+ std::cout << tensor->data<float>[i] << ", ";
+ }
+
+ // User observer can override only needed methods,
+ // others will inherit empty implementation from base observer.
+
+ // void preOperatorExecute(const luci::CircleNode *node);
+ // void postOperatorExecute(const luci::CircleNode *node);
+};
+
+luci_interpreter::Interpreter interpreter(module);
+CustomExecutionObserver observer;
+interpreter.attachObserver(&observer);
+
+// initialize input_data
+interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+
+interpreter.interpret();
+```
+
+## Customizing inference
+
+### Memory manager
+
+Interpreter provides a handle for altering default memory management mechanisms.
+
+This is done by `MemoryManger` interface, see `luci-interpreter/include/luci_interpreter/MemoryManager.h` for implementation details.
+
+This header contains `IMemoryManager` abstract class which is responsible for allocation and dealocation of tensors' memory.
+
+User can construct an interpreter with one of predefined memory managers or their own custom memory manager.
+Note that one memory manager could be shared between multiple interpreter instances, because an interpreter does not own the manager object.
+
+List of predefined memory managers:
+- `SimpleMemoryManager` This is a simple wrapper around new/delete, default one.
+- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager destructor, used in kernel unit tests.
+- `BuddyMemoryManager` Implements Buddy algorithm, uses external buffer for tensor data allocations, does not need new/delete.
+- `StaticMemoryManger` Uses precomputed memory allocation plan. Requires preparation with MemoryPlanner, but could reduce memory consumption in restricted environments (like MCUs).
+
+**SimpleMemoryManager usage example:**
+
+No need to select anything, to use this memory manager.
+``` c++
+luci_interpreter::Interpreter interpreter(module);
+```
+
+**TestMemoryManager usage example:**
+
+``` c++
+luci_interpreter::TestMemoryManager mm;
+luci_interpreter::Interpreter interpreter(module, &mm);
+```
+
+**BuddyMemoryManager usage example:**
+
+`BuddyMemoryManager` implements a classic allocation algorithm: https://en.wikipedia.org/wiki/Buddy_memory_allocation.
+
+This allocator uses an external buffer as a memory pool. That allows to use static memory arrays for allocations.
+
+Limitations
+- Current implementation uses only lower power-of-two bytes of given buffer.
+
+ For example for 1000 bytes buffer, only lower 512 bytes will be used.
+- Current implementation can handle maximum 4 gigabyte memory pool
+
+``` c++
+ constexpr int buffer_size = 2048;
+ static uint8_t buffer[buffer_size];
+ luci_interpreter::BuddyMemoryManager memory_manager(buffer, buffer_size);
+ luci_interpreter::Interpreter interpreter(module.get(), &memory_manager);
+```
+
+**StaticMemoryManager usage example:**
+``` c++
+TBD when it is merged
+```
+
+## Further reading
+
+If you want to participate in development, please read `DEVELOPER.md` for SW architecture details.
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h
new file mode 100644
index 000000000..205baa626
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h
@@ -0,0 +1,144 @@
+/* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/MemoryManager.h"
+
+#ifndef LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+
+namespace luci_interpreter
+{
+
+class BuddyMemoryManager : public IMemoryManager
+{
+public:
+ BuddyMemoryManager(uint8_t *memory_start, int32_t memSize);
+
+ void allocate_memory(luci_interpreter::Tensor &tensor) final;
+ void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+ struct Block
+ {
+ Block *next_free;
+ bool is_free;
+ uint32_t size;
+ // debug field
+ Block *self;
+ };
+
+ Block *_start_block;
+ int32_t _num_blocks;
+ uint32_t _size;
+ Block *_free_blocks[32]{};
+
+ static int32_t lowerLog2(uint32_t val)
+ {
+ int32_t i = 0;
+ while (val >>= 1)
+ i++;
+
+ return i;
+ }
+
+ void addToBlocks(Block *block, int32_t l)
+ {
+ if (!block)
+ return;
+
+ block->next_free = _free_blocks[l];
+ _free_blocks[l] = block;
+ }
+
+ void removeFromBlocks(const Block *block, int32_t l)
+ {
+ if (!block)
+ return;
+
+ Block *tmp = _free_blocks[l];
+
+ if (block == tmp)
+ {
+ _free_blocks[l] = block->next_free;
+ return;
+ }
+
+ while (tmp)
+ {
+ if (tmp->next_free == block)
+ {
+ tmp->next_free = block->next_free;
+ return;
+ }
+
+ tmp = tmp->next_free;
+ }
+ }
+
+ void divideBlock(Block *block, int32_t l)
+ {
+ int32_t size = ((block->size + sizeof(Block)) / 2) - sizeof(Block);
+
+ removeFromBlocks(block, l);
+
+ // there is no need to add to the free_blocks list here
+ block->is_free = true;
+ block->size = size;
+ block->self = block;
+
+ Block *buddy;
+ buddy = (Block *)((uint8_t *)block + sizeof(Block) + size);
+ buddy->is_free = true;
+ buddy->size = size;
+ buddy->self = buddy;
+
+ addToBlocks(buddy, l - 1);
+ }
+
+ Block *mergeBlock(Block *block)
+ {
+ Block *buddy;
+
+ const int32_t l = lowerLog2(block->size + sizeof(Block));
+
+ const int64_t address = ((uint8_t *)block - (uint8_t *)_start_block);
+ buddy = (Block *)((address ^ (1 << l)) + (uint8_t *)_start_block);
+
+ if (!buddy->is_free || buddy->size != block->size)
+ return nullptr;
+
+ if (block > buddy)
+ {
+ Block *x = block;
+ block = buddy;
+ buddy = x;
+ }
+
+ removeFromBlocks(block, l);
+ removeFromBlocks(buddy, l);
+
+ block->size = block->size * 2 + sizeof(Block);
+ block->is_free = true;
+ block->self = block;
+
+ addToBlocks(block, l + 1);
+
+ return block;
+ }
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h
new file mode 100644
index 000000000..375b1ae20
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+#define __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+
+#include <luci/Import/GraphBuilderRegistry.h>
+
+namespace luci_interpreter
+{
+
+/**
+ * @brief Creates and returns GraphBuilderSource, which allows to not copy constant buffers from
+ * model's file.
+ *
+ * @warning Use this source only in case when model's buffer alive longer than Interpreter.
+ */
+std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying();
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h
new file mode 100644
index 000000000..8e2f457a5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_INTERPRETER_H
+#define LUCI_INTERPRETER_INTERPRETER_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <luci/IR/Nodes/CircleInput.h>
+#include <luci/IR/Nodes/CircleOutput.h>
+
+#include "luci_interpreter/MemoryManager.h"
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <vector>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class ExecutionObserver
+{
+public:
+ virtual ~ExecutionObserver();
+
+ // Called when the value of a tensor has been updated during execution.
+ virtual void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor);
+
+ // Called before / after executing an operator.
+ // Note that these methods are not called for auxiliary operators (CircleInput, CircleOutput,
+ // CircleConst and Circle*Out).
+ virtual void preOperatorExecute(const luci::CircleNode *node);
+ virtual void postOperatorExecute(const luci::CircleNode *node);
+};
+
+class Interpreter
+{
+public:
+ explicit Interpreter(const luci::Module *module);
+
+ explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager);
+
+ ~Interpreter();
+
+ void writeInputTensor(const luci::CircleInput *input_node, const void *data, size_t data_size);
+
+ void readOutputTensor(const luci::CircleOutput *output_node, void *data, size_t data_size);
+
+ void interpret();
+
+ void attachObserver(ExecutionObserver *observer);
+
+ const Tensor *getTensor(const loco::Node *node) { return _node_to_tensor[node]; }
+
+private:
+ // _default_memory_manager should be before _runtime_module due to
+ // the order of deletion in the destructor
+ std::unique_ptr<IMemoryManager> _default_memory_manager = nullptr;
+ std::unique_ptr<class RuntimeModule> _runtime_module;
+
+ // Observer functionality support.
+ std::unique_ptr<struct RuntimeToIR> _runtime_to_ir;
+ std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor;
+ std::unique_ptr<class EventNotifier> _event_notifier;
+ std::vector<ExecutionObserver *> _observers;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_INTERPRETER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h
new file mode 100644
index 000000000..f32c52095
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_MEMORY_MANAGER_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/Tensor.h"
+
+namespace luci_interpreter
+{
+
+class IMemoryManager
+{
+public:
+ virtual void allocate_memory(luci_interpreter::Tensor &tensor) = 0;
+ virtual void release_memory(luci_interpreter::Tensor &tensor) = 0;
+
+ virtual ~IMemoryManager() = default;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h
new file mode 100644
index 000000000..658a1c609
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+
+class SimpleMemoryManager : public IMemoryManager
+{
+public:
+ void allocate_memory(luci_interpreter::Tensor &tensor) final;
+ void release_memory(luci_interpreter::Tensor &tensor) final;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h
new file mode 100644
index 000000000..ded7bde79
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+
+// Used for allocations in static buffer, using offsets defined in luci model.
+class StaticMemoryManager : public IMemoryManager
+{
+public:
+ StaticMemoryManager() = delete;
+
+ explicit StaticMemoryManager(uint8_t *buffer_ptr) : _buffer_ptr(buffer_ptr)
+ { /* Do nothing */
+ }
+
+ void allocate_memory(luci_interpreter::Tensor &tensor) final;
+ void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+ // Stores a pointer to the beginning of the allocated memory buffer.
+ uint8_t *_buffer_ptr;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h
new file mode 100644
index 000000000..397bbed76
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+// Memory Manager for using in kernels tests. This eliminates the need to manually delete the
+// allocated memory in tests. This mem_manager remembers all its allocations and in destructor
+// delete all allocations.
+class TestMemoryManager : public IMemoryManager
+{
+public:
+ void allocate_memory(luci_interpreter::Tensor &tensor) final;
+ void release_memory(luci_interpreter::Tensor &tensor) final;
+
+ ~TestMemoryManager() override
+ {
+ for (auto allocation : allocations)
+ {
+ delete[] allocation;
+ }
+ }
+
+private:
+ std::vector<uint8_t *> allocations;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h
new file mode 100644
index 000000000..27bf719b5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_DATATYPE_H
+#define LUCI_INTERPRETER_CORE_DATATYPE_H
+
+#include <loco/IR/DataType.h>
+#include <loco/IR/DataTypeTraits.h>
+
+#include <cstddef>
+
+namespace luci_interpreter
+{
+
+using DataType = loco::DataType;
+
+template <DataType DT> using DataTypeImpl = loco::DataTypeImpl<DT>;
+
+inline size_t getDataTypeSize(DataType data_type) { return loco::size(data_type); }
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_DATATYPE_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h
new file mode 100644
index 000000000..bb9ff6d4a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_TENSOR_H
+#define LUCI_INTERPRETER_CORE_TENSOR_H
+
+#include "luci_interpreter/core/DataType.h"
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class Shape
+{
+public:
+ explicit Shape(int rank) : _dims(rank, 0) {}
+
+ Shape(std::initializer_list<int32_t> dims) : _dims(dims.begin(), dims.end()) {}
+
+ int num_dims() const { return _dims.size(); }
+
+ int32_t dim(int i) const
+ {
+ assert(i >= 0 && i < static_cast<int>(_dims.size()));
+ return _dims[i];
+ }
+
+ int32_t &dim(int i)
+ {
+ assert(i >= 0 && i < static_cast<int>(_dims.size()));
+ return _dims[i];
+ }
+
+ int32_t num_elements() const
+ {
+ int32_t result = 1;
+ for (const int32_t dim : _dims)
+ {
+ result *= dim;
+ }
+ return result;
+ }
+
+ bool operator==(const Shape &other) const { return _dims == other._dims; }
+
+ bool operator!=(const Shape &other) const { return !operator==(other); }
+
+private:
+ std::vector<int32_t> _dims;
+};
+
+// Tensor affine quantization parameters.
+//
+// The relationship between real and quantized values:
+// real_value = (quantized_value - zero_point) * scale
+//
+// In per-tensor case, 'scale' and 'zero_point' are one element each.
+// In per-channel case, 'scale' and 'zero_point' are N elements each, where N is the size
+// of the quantized dimension.
+//
+// Note that due to historical and performance reasons, per-tensor quantization uses unsigned
+// integer types, while per-channel uses signed types assuming 'zero_point' == 0.
+struct AffineQuantization
+{
+ std::vector<float> scale;
+ std::vector<int32_t> zero_point;
+ int32_t quantized_dimension;
+};
+
+class Tensor
+{
+public:
+ Tensor(DataType element_type, Shape shape, AffineQuantization quantization, std::string name);
+
+ DataType element_type() const { return _element_type; }
+
+ const Shape &shape() const { return _shape; }
+
+ float scale() const
+ {
+ assert(_quantization.scale.size() == 1);
+ return _quantization.scale[0];
+ }
+
+ int32_t zero_point() const
+ {
+ assert(_quantization.zero_point.size() == 1);
+ return _quantization.zero_point[0];
+ }
+
+ const std::vector<float> &scales() const { return _quantization.scale; }
+
+ const std::vector<int32_t> &zero_points() const { return _quantization.zero_point; }
+
+ int32_t quantized_dimension() const { return _quantization.quantized_dimension; }
+
+ template <typename T> const T *data() const
+ {
+ static_assert(std::is_same<uint8_t, char>::value or
+ std::is_same<uint8_t, unsigned char>::value);
+ return reinterpret_cast<const T *>(_data);
+ }
+
+ template <typename T> T *data()
+ {
+ static_assert(std::is_same<uint8_t, char>::value or
+ std::is_same<uint8_t, unsigned char>::value);
+ return reinterpret_cast<T *>(_data);
+ }
+
+ const std::string &name() const { return _name; }
+
+ void readData(void *data_ptr, size_t data_size) const;
+
+ void writeData(const void *data_ptr, size_t data_size);
+
+ void resize(const Shape &new_shape);
+
+ void set_data_buffer(uint8_t *buffer)
+ {
+ if (buffer == nullptr)
+ {
+ _data_allocated = false;
+ }
+ else
+ {
+ _data_allocated = true;
+ }
+ _data = buffer;
+ }
+
+ bool is_observable() const { return _is_observable; }
+
+ void set_observable(bool value) { _is_observable = value; }
+
+ bool is_allocatable() const { return _is_allocatable; }
+
+ void set_allocatable(bool value) { _is_allocatable = value; }
+
+ bool is_data_allocated() const { return _data_allocated; }
+
+ int32_t get_offset() const { return _offset; }
+
+ void set_offset(int32_t offset) { _offset = offset; }
+
+private:
+ DataType _element_type;
+ Shape _shape;
+ AffineQuantization _quantization;
+ uint8_t *_data;
+ std::string _name;
+ bool _data_allocated;
+ // Write of tensor is reported to registered Observers only if this tensor is observable
+ // This is needed for tensors used in kernel implementation, but not present in original model.
+ bool _is_observable = true;
+ // Memory manager is called for tensor only if it is "allocatable".
+ // Kernel configuration could disable allocation of some tensors if they are not needed for
+ // particular operation.
+ bool _is_allocatable = true;
+ // Used by static memory manager.
+ // Stores the offset from the beginning of the allocated memory buffer.
+ int32_t _offset = -1;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_TENSOR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
new file mode 100644
index 000000000..f0df58db3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
@@ -0,0 +1,62 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+ const T2 *axis, const tflite::RuntimeShape &output_shape,
+ T3 *output_data, const std::greater<T1> cmp)
+{
+ tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h
new file mode 100644
index 000000000..a274afb7e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "AveragePool NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape,
+ int8_t *scratchpad_data)
+{
+ assert(input_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ assert(scratchpad_data != nullptr);
+
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ assert(batches == 1);
+
+ const int depth = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = 1;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = 1;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = depth;
+
+ cmsis_nn_pool_params pool_params;
+ pool_params.stride.h = params.stride_height;
+ pool_params.stride.w = params.stride_width;
+ pool_params.padding.h = params.padding_values.height;
+ pool_params.padding.w = params.padding_values.width;
+ pool_params.activation.min = params.quantized_activation_min;
+ pool_params.activation.max = params.quantized_activation_max;
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = 1;
+ filter_dims.h = params.filter_height;
+ filter_dims.w = params.filter_width;
+ filter_dims.c = 1;
+
+ cmsis_nn_context ctx;
+ ctx.buf = scratchpad_data;
+ ctx.size = scratchpad_shape.Dims(0);
+ auto res = arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, &output_dims,
+ output_data);
+ assert(res == ARM_MATH_SUCCESS);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ if (input_data_type == luci_interpreter::DataType::S8)
+ {
+ assert(input_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+
+ const int32_t output_width = output_shape.Dims(2);
+ const int32_t depth = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
+ const int32_t buf_size = arm_avgpool_s8_get_buffer_size(output_width, depth);
+ auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+ luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+ scratchpad->resize(scratchpad_shape);
+ }
+ else
+ {
+ scratchpad->set_allocatable(false);
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
new file mode 100644
index 000000000..4dd77ffdc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::BatchToSpaceND(
+ unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h
new file mode 100644
index 000000000..cfb84ea60
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &filter_shape,
+ const float *filter_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, const tflite::RuntimeShape &output_shape,
+ float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ float *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data,
+ tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+ const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+ const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+ uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ uint8 *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+ scratchpad_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+ const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+ const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+ const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+ int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ int8 *scratchpad_data)
+{
+ if (scratchpad_data)
+ {
+ cmsis_nn_conv_params conv_params;
+ conv_params.dilation.h = params.dilation_height_factor;
+ conv_params.dilation.w = params.dilation_width_factor;
+
+ assert(conv_params.dilation.h == 1);
+ assert(conv_params.dilation.w == 1);
+
+ conv_params.input_offset = params.input_offset;
+ conv_params.output_offset = params.output_offset;
+ conv_params.stride.h = params.stride_height;
+ conv_params.stride.w = params.stride_width;
+ conv_params.padding.h = params.padding_values.height;
+ conv_params.padding.w = params.padding_values.width;
+ conv_params.activation.min = params.quantized_activation_min;
+ conv_params.activation.max = params.quantized_activation_max;
+
+ cmsis_nn_per_channel_quant_params quant_params;
+ quant_params.multiplier = const_cast<int32_t *>(mult);
+ quant_params.shift = const_cast<int32_t *>(shifts);
+
+ assert(conv_params.activation.min <= conv_params.activation.max);
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+ const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+ if (bias_data)
+ {
+ assert(bias_shape.FlatSize() == output_depth);
+ }
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batch_size;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_depth;
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = output_depth;
+ filter_dims.h = filter_shape.Dims(1);
+ filter_dims.w = filter_shape.Dims(2);
+ filter_dims.c = input_depth;
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = 1;
+ bias_dims.h = 1;
+ bias_dims.w = 1;
+ bias_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batch_size;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = output_depth;
+
+ cmsis_nn_context ctx;
+ ctx.buf = scratchpad_data;
+ ctx.size = scratchpad_shape.Dims(0);
+
+ auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
+ &filter_dims, filter_data, &bias_dims, bias_data,
+ &output_dims, output_data);
+ assert(res == ARM_MATH_SUCCESS);
+ }
+ else
+ {
+ tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+ filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data);
+ }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::ConvParams &params,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ cmsis_nn_conv_params conv_params;
+ conv_params.dilation.h = params.dilation_height_factor;
+ conv_params.dilation.w = params.dilation_width_factor;
+
+ if (input_data_type == loco::DataType::S8 && conv_params.dilation.h == 1 &&
+ conv_params.dilation.w == 1)
+ {
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+ const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+ const int32_t filter_height = filter_shape.Dims(1);
+ const int32_t filter_width = filter_shape.Dims(2);
+ const int32_t output_height = output_shape.Dims(1);
+ const int32_t output_width = output_shape.Dims(2);
+
+ conv_params.input_offset = params.input_offset;
+ conv_params.output_offset = params.output_offset;
+ conv_params.stride.h = params.stride_height;
+ conv_params.stride.w = params.stride_width;
+ conv_params.padding.h = params.padding_values.height;
+ conv_params.padding.w = params.padding_values.width;
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batches;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_depth;
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = output_depth;
+ filter_dims.h = filter_height;
+ filter_dims.w = filter_width;
+ filter_dims.c = input_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batches;
+ output_dims.h = output_height;
+ output_dims.w = output_width;
+ output_dims.c = output_depth;
+
+ const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
+ &filter_dims, &output_dims);
+
+ luci_interpreter::Shape scratchpad_shape{buf_size};
+ scratchpad->resize(scratchpad_shape);
+ }
+ else
+ {
+ scratchpad->set_allocatable(false);
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h
new file mode 100644
index 000000000..8463e571e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape,
+ const T *input_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..120dcd803
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &filter_shape,
+ const T *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+ T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "DepthwiseConvPerChannel NYI");
+ (void)params;
+ (void)output_multiplier;
+ (void)output_shift;
+ (void)input_shape;
+ (void)output_data;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+ const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+ if (scratchpad_data)
+ {
+ cmsis_nn_dw_conv_params dw_conv_params;
+ dw_conv_params.dilation.h = params.dilation_height_factor;
+ dw_conv_params.dilation.w = params.dilation_width_factor;
+ assert(dw_conv_params.dilation.h == 1);
+ assert(dw_conv_params.dilation.w == 1);
+
+ dw_conv_params.input_offset = params.input_offset;
+ dw_conv_params.output_offset = params.output_offset;
+ dw_conv_params.stride.h = params.stride_height;
+ dw_conv_params.stride.w = params.stride_width;
+ dw_conv_params.padding.h = params.padding_values.height;
+ dw_conv_params.padding.w = params.padding_values.width;
+
+ dw_conv_params.activation.min = params.quantized_activation_min;
+ dw_conv_params.activation.max = params.quantized_activation_max;
+ dw_conv_params.ch_mult = params.depth_multiplier;
+
+ cmsis_nn_per_channel_quant_params quant_params;
+ int32_t output_multiplier = params.output_multiplier;
+ int32_t output_shift = params.output_shift;
+
+ quant_params.multiplier = &output_multiplier;
+ quant_params.shift = &output_shift;
+
+ assert(dw_conv_params.activation.min <= dw_conv_params.activation.max);
+ const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+ if (bias_data)
+ {
+ assert(bias_shape.FlatSize() == output_depth);
+ }
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batch_size;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_shape.Dims(3);
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = filter_shape.Dims(0);
+ filter_dims.h = filter_shape.Dims(1);
+ filter_dims.w = filter_shape.Dims(2);
+ filter_dims.c = output_depth;
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = 1;
+ bias_dims.h = 1;
+ bias_dims.w = 1;
+ bias_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batch_size;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = output_depth;
+
+ cmsis_nn_context ctx;
+ ctx.buf = scratchpad_data;
+ ctx.size = scratchpad_shape.Dims(0);
+
+ auto res = arm_depthwise_conv_wrapper_s8(&ctx, &dw_conv_params, &quant_params, &input_dims,
+ input_data, &filter_dims, filter_data, &bias_dims,
+ bias_data, &output_dims, output_data);
+ assert(res == ARM_MATH_SUCCESS);
+ }
+ else
+ {
+ tflite::reference_integer_ops::DepthwiseConvPerChannel(
+ params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data);
+ }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const tflite::DepthwiseParams &params,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ cmsis_nn_dw_conv_params dw_conv_params;
+ dw_conv_params.dilation.h = params.dilation_height_factor;
+ dw_conv_params.dilation.w = params.dilation_width_factor;
+
+ if (input_data_type == loco::DataType::S8 && dw_conv_params.dilation.h == 1 &&
+ dw_conv_params.dilation.w == 1)
+ {
+ const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batch_size;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_shape.Dims(3);
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = filter_shape.Dims(0);
+ filter_dims.h = filter_shape.Dims(1);
+ filter_dims.w = filter_shape.Dims(2);
+ filter_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batch_size;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = output_depth;
+
+ const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
+ &dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+ auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+ luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+ scratchpad->resize(scratchpad_shape);
+ }
+ else
+ {
+ scratchpad->set_allocatable(false);
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h
new file mode 100644
index 000000000..15ff0327b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+ output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h
new file mode 100644
index 000000000..4089d0a0c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/elu.h>
+
+namespace luci_interpreter_pal
+{
+
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
new file mode 100644
index 000000000..32e905761
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &filter_shape, const T *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ {
+ // MARK: At this moment this operation doesn't support
+ assert(false && "FullyConnected NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+ assert(output_shape.DimensionsCount() == 2);
+
+ const int batches = output_shape.Dims(0);
+ const int output_depth = output_shape.Dims(1);
+
+ const int filter_dim_count = filter_shape.DimensionsCount();
+ const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+
+ cmsis_nn_fc_params fc_params;
+ fc_params.input_offset = params.input_offset;
+ fc_params.output_offset = params.output_offset;
+ fc_params.filter_offset = params.weights_offset;
+ fc_params.activation.min = params.quantized_activation_min;
+ fc_params.activation.max = params.quantized_activation_max;
+
+ cmsis_nn_per_tensor_quant_params quant_params;
+ quant_params.multiplier = params.output_multiplier;
+ quant_params.shift = params.output_shift;
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batches;
+ input_dims.h = 1;
+ input_dims.w = 1;
+ input_dims.c = accum_depth;
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = accum_depth;
+ filter_dims.h = 1;
+ filter_dims.w = 1;
+ filter_dims.c = output_depth;
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = 1;
+ bias_dims.h = 1;
+ bias_dims.w = 1;
+ bias_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batches;
+ output_dims.h = 1;
+ output_dims.w = 1;
+ output_dims.c = output_depth;
+
+ int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
+ auto buffer = std::make_unique<int8_t[]>(buf_size);
+ assert(buffer != nullptr);
+
+ cmsis_nn_context ctx;
+ ctx.buf = buffer.get();
+ ctx.size = buf_size;
+
+ auto res =
+ arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
+ filter_data, &bias_dims, bias_data, &output_dims, output_data);
+ assert(res == ARM_MATH_SUCCESS);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h
new file mode 100644
index 000000000..f84742a44
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/reference/l2normalization.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
new file mode 100644
index 000000000..38a302fc6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
+{
+ tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h
new file mode 100644
index 000000000..9ccd2224f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h
new file mode 100644
index 000000000..347a97a83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/mul.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
+{
+ tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h
new file mode 100644
index 000000000..be5903a0c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/reference/neg.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h
new file mode 100644
index 000000000..6046789ae
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+ int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+ int32_t input_zero_point, int32_t output_zero_point,
+ Output *output_data)
+{
+ tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+ effective_scale_shift, input_zero_point, output_zero_point,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h
new file mode 100644
index 000000000..cc9f0fd54
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+ output_size_shape, output_size_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..f4d5a6ed3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+ output_size_shape, output_size_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h
new file mode 100644
index 000000000..a4a5b2a78
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, int16_t *activation_state_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+ int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+ int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+ const int32_t rank = params.rank;
+ const int32_t batch_size = input_shape.Dims(0);
+ const int32_t num_filters = weight_feature_shape.Dims(0);
+ const int32_t memory_size = weight_time_shape.Dims(1);
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = input_shape.Dims(0);
+ input_dims.h = input_shape.Dims(1);
+
+ cmsis_nn_dims weights_feature_dims;
+ weights_feature_dims.n = weight_feature_shape.Dims(0);
+ weights_feature_dims.h = weight_feature_shape.Dims(1);
+
+ cmsis_nn_dims weights_time_dims;
+ weights_time_dims.n = weight_time_shape.Dims(0);
+ weights_time_dims.h = weight_time_shape.Dims(1);
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = bias_shape.Dims(0);
+
+ cmsis_nn_dims state_dims;
+ state_dims.n = batch_size;
+ state_dims.h = memory_size * num_filters;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = output_shape.Dims(0);
+ output_dims.h = output_shape.Dims(1);
+
+ cmsis_nn_svdf_params svdf_params;
+ svdf_params.rank = params.rank;
+ svdf_params.input_offset = input_zp;
+ svdf_params.output_offset = output_zp;
+
+ svdf_params.input_activation.min = INT16_MIN;
+ svdf_params.input_activation.max = INT16_MAX;
+
+ svdf_params.output_activation.min = INT8_MIN;
+ svdf_params.output_activation.max = INT8_MAX;
+
+ cmsis_nn_per_tensor_quant_params in_quant_params;
+ in_quant_params.multiplier = scale_1_a;
+ in_quant_params.shift = scale_1_b;
+
+ cmsis_nn_per_tensor_quant_params out_quant_params;
+ out_quant_params.multiplier = scale_2_a;
+ out_quant_params.shift = scale_2_b;
+
+ cmsis_nn_context scratch_ctx;
+ scratch_ctx.buf = scratchpad_data;
+
+ cmsis_nn_context scratch_output_ctx;
+ scratch_output_ctx.buf = output_temp_data;
+
+ arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params,
+ &input_dims, input_data, &state_dims, activation_state_data, &weights_feature_dims,
+ weight_feature_data, &weights_time_dims, weight_time_data, &bias_dims, bias_data,
+ &output_dims, output_data);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, float *scratchpad_data, float *activation_state_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ const int32_t rank = params.rank;
+ const int32_t batch_size = input_shape.Dims(0);
+ const int32_t input_size = input_shape.Dims(1);
+ const int32_t num_filters = weight_feature_shape.Dims(0);
+ const int32_t num_units = num_filters / rank;
+ const int32_t memory_size = weight_time_shape.Dims(1);
+
+ // Left shift the activation_state.
+ {
+ float *new_state_start = activation_state_data;
+ const float *old_state_start = activation_state_data + 1;
+ const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+ while (old_state_start != old_state_end)
+ {
+ *new_state_start++ = *old_state_start++;
+ }
+ }
+
+ // Note: no need to clear the latest activation, matmul is not accumulative.
+
+ // Compute conv1d(inputs, weights_feature).
+ // The activation_state's rightmost column is used to save current cycle
+ // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+ // having the stride equal to memory_size.
+
+ // Perform batched matrix vector multiply operation:
+ {
+ const float *matrix = weight_feature_data;
+ const float *vector = input_data;
+ float *result = &activation_state_data[memory_size - 1];
+ float *result_in_batch = result;
+ for (int i = 0; i < batch_size; ++i)
+ {
+ const float *matrix_ptr = matrix;
+ for (int j = 0; j < num_filters; ++j)
+ {
+ float dot_prod = 0.0f;
+ const float *vector_in_batch = vector + i * input_size;
+ for (int k = 0; k < input_size; ++k)
+ {
+ dot_prod += *matrix_ptr++ * *vector_in_batch++;
+ }
+ *result_in_batch = dot_prod;
+ result_in_batch += memory_size;
+ }
+ }
+ }
+
+ tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+ batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+ params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+ const luci_interpreter::DataType &input_data_type,
+ const luci_interpreter::DataType &weight_feature_data_type,
+ luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+ luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+ luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+ const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+ const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+ if (input_data_type == loco::DataType::FLOAT32 &&
+ (weight_feature_data_type == loco::DataType::S8 ||
+ weight_feature_data_type == loco::DataType::U8))
+ {
+ (void)input_shape;
+ (void)weight_time_shape;
+ (void)scratchpad_3;
+ (void)scratchpad_4;
+ (void)scratchpad_5;
+ (void)scratchpad_6;
+
+ throw std::runtime_error("Hybrid type is not supported for cmsisnn");
+ }
+
+ // Resize scratchpad_1 tensor
+ scratchpad_1->resize({batch_size, num_filters});
+
+ if (input_data_type == loco::DataType::S8)
+ {
+ // Resize scratchpad_2 for full_integer op
+ scratchpad_2->resize({batch_size, num_units});
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h
new file mode 100644
index 000000000..6bbda4867
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+ float beta)
+{
+ // Do nothing for mcu
+ (void)data;
+ (void)input_scale;
+ (void)beta;
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+ int32 input_beta_multiplier;
+ int input_beta_left_shift;
+ static const int kScaledDiffIntegerBits = 5;
+ tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
+ &input_beta_multiplier, &input_beta_left_shift);
+
+ params->input_multiplier = input_beta_multiplier;
+ params->input_left_shift = input_beta_left_shift;
+ params->diff_min =
+ -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift);
+}
+
+template <typename T>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ // MARK: At this moment this operation doesn't support on mcu
+ assert(false && "Softmax NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)output_shape;
+ (void)output_data;
+}
+
+template <>
+inline void Softmax<int8_t>(const tflite::SoftmaxParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+ const int trailing_dim = input_shape.DimensionsCount() - 1;
+ const int outer_size = tflite::MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+ const int depth = tflite::MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+ const int32_t mult = params.input_multiplier;
+ const int32_t shift = params.input_left_shift;
+ const int32_t diff_min = params.diff_min;
+
+ arm_softmax_s8(input_data, outer_size, depth, mult, shift, diff_min, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h
new file mode 100644
index 000000000..fdddaa929
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+ const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::SpaceToBatchND(
+ params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h
new file mode 100644
index 000000000..816b7f663
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape,
+ const T *input_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h
new file mode 100644
index 000000000..ea57578c6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/reference/sub.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+ const tflite::RuntimeShape &input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &input2_shape, const T *input2_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+ output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake
new file mode 100644
index 000000000..a68b363d9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake
@@ -0,0 +1,65 @@
+macro(initialize_pal)
+ nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+ nnas_find_package(CMSISSource EXACT 5.8.0 QUIET)
+
+ if (NOT TensorFlowSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowGEMMLowpSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowEigenSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Eigen not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowRuySource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Ruy not found")
+ return()
+ endif ()
+
+ if (NOT CMSISSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: CMSISSource not found")
+ return()
+ endif ()
+
+ set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+ target_include_directories(${TGT} PRIVATE "${PAL}")
+ target_include_directories(${TGT} PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}")
+ target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+ file(GLOB_RECURSE PAL_SOURCES "${CMSISSource_DIR}/CMSIS/NN/Source/*.c")
+ list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
+ add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES})
+ set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON)
+ target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}"
+ )
+
+ add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN)
+ target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC
+ "${CMSISSource_DIR}/CMSIS/NN/Include"
+ "${CMSISSource_DIR}/CMSIS/DSP/Include"
+ "${CMSISSource_DIR}/CMSIS/Core/Include")
+
+ target_link_libraries(${TGT} PRIVATE luci_interpreter_cmsisnn_pal)
+endmacro()
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst
new file mode 100644
index 000000000..8e20559f9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst
@@ -0,0 +1,77 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchMatMul)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Gather)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LocalResponseNormalization)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(LogSoftmax)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Mean)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(OneHot)
+REGISTER_KERNEL(Pack)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(Pow)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
+REGISTER_KERNEL(Relu)
+REGISTER_KERNEL(Relu6)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(ReverseV2)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
+REGISTER_KERNEL(Slice)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(Split)
+REGISTER_KERNEL(SplitV)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(Unpack)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+ const T2 *axis, const tflite::RuntimeShape &output_shape,
+ T3 *output_data, const std::greater<T1> cmp)
+{
+ tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h
new file mode 100644
index 000000000..cce30601f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation doesn't support
+ assert(false && "AveragePool NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape,
+ int8_t *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+
+ tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape,
+ output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ (void)input_data_type;
+ (void)input_shape;
+ (void)output_shape;
+
+ scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h
new file mode 100644
index 000000000..3894f2d92
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHMATMUL_H
+#define LUCI_INTERPRETER_PAL_BATCHMATMUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_matmul.h>
+
+namespace luci_interpreter_pal
+{
+inline void BatchMatMul(const tflite::RuntimeShape &lhs_shape, const float *lhs_data,
+ const tflite::RuntimeShape &rhs_shape, const float *rhs_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::BatchMatMul(lhs_shape, lhs_data, rhs_shape, rhs_data, output_shape,
+ output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *lhs_scratchpad,
+ luci_interpreter::Tensor *rhs_scratchpad,
+ const tflite::RuntimeShape &lhs_shape,
+ const tflite::RuntimeShape &rhs_shape)
+{
+ // Scratchpad for transposed LHS
+ {
+ auto lhs_rank = lhs_shape.DimensionsCount();
+ luci_interpreter::Shape scratchpad_size(lhs_rank);
+ for (int i = 0; i < lhs_rank - 2; ++i)
+ {
+ scratchpad_size.dim(i) = lhs_shape.Dims(i);
+ }
+ scratchpad_size.dim(lhs_rank - 2) = lhs_shape.Dims(lhs_rank - 1);
+ scratchpad_size.dim(lhs_rank - 1) = lhs_shape.Dims(lhs_rank - 2);
+
+ lhs_scratchpad->resize(scratchpad_size);
+ }
+ // Scratchpad for transposed RHS
+ {
+ auto rhs_rank = rhs_shape.DimensionsCount();
+ luci_interpreter::Shape scratchpad_size(rhs_rank);
+ for (int i = 0; i < rhs_rank - 2; ++i)
+ {
+ scratchpad_size.dim(i) = rhs_shape.Dims(i);
+ }
+ scratchpad_size.dim(rhs_rank - 2) = rhs_shape.Dims(rhs_rank - 1);
+ scratchpad_size.dim(rhs_rank - 1) = rhs_shape.Dims(rhs_rank - 2);
+
+ rhs_scratchpad->resize(scratchpad_size);
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHMATMUL_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h
new file mode 100644
index 000000000..3fe2022ed
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::optimized_ops::BatchToSpaceND(
+ unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h
new file mode 100644
index 000000000..985a15f39
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &filter_shape,
+ const float *filter_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, const tflite::RuntimeShape &output_shape,
+ float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ float *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ if (scratchpad_data)
+ {
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+ const int32_t output_height = output_shape.Dims(1);
+ const int32_t output_width = output_shape.Dims(2);
+ const int32_t filter_height = filter_shape.Dims(1);
+ const int32_t filter_width = filter_shape.Dims(2);
+ tflite::RuntimeShape im2col_shape{batches, output_height, output_width,
+ input_depth * filter_height * filter_width};
+
+ tflite::optimized_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, im2col_shape,
+ scratchpad_data);
+ }
+ else
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data,
+ tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+ const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+ const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+ uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ uint8 *scratchpad_data)
+{
+ // TODO This should only be done once (although it takes only a few microseconds).
+ // Also, the user should be able to adjust the number of threads.
+ auto gemmlowp_context = std::make_unique<gemmlowp::GemmContext>();
+ gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
+
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+ scratchpad_data, gemmlowp_context.get());
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+ const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+ const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+ const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+ int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ int8 *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ // TODO enable optimized version
+ tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+ filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::ConvParams &params,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ const int32_t filter_height = filter_shape.Dims(1);
+ const int32_t filter_width = filter_shape.Dims(2);
+
+ // Allocate tensor for scratchpad, if needed.
+ // The checks here should be aligned with the actual implementation.
+ const bool need_dilated_scratchpad =
+ params.dilation_height_factor != 1 || params.dilation_width_factor != 1;
+ const bool need_non_dilated_scratchpad = params.stride_height != 1 || params.stride_width != 1 ||
+ filter_height != 1 || filter_width != 1;
+ auto _need_scratchpad = input_data_type != luci_interpreter::DataType::S16 &&
+ (need_dilated_scratchpad || need_non_dilated_scratchpad);
+
+ if (_need_scratchpad)
+ {
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+ const int32_t output_height = output_shape.Dims(1);
+ const int32_t output_width = output_shape.Dims(2);
+
+ auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+ int32_t scratchpad_size = batches * output_width * output_height * input_depth * filter_height *
+ filter_width * data_type_size;
+ luci_interpreter::Shape scratchpad_shape{scratchpad_size};
+ scratchpad->resize(scratchpad_shape);
+ }
+ else
+ {
+ scratchpad->set_allocatable(false);
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h
new file mode 100644
index 000000000..f9ebfcfb5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape,
+ const T *input_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::optimized_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..c9d1a2948
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &filter_shape,
+ const T *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+ T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "DepthwiseConvPerChannel NYI");
+ (void)params;
+ (void)output_multiplier;
+ (void)output_shift;
+ (void)input_shape;
+ (void)output_data;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+ const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_integer_ops::DepthwiseConvPerChannel(
+ params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const tflite::DepthwiseParams &params,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ (void)params;
+ (void)input_data_type;
+ (void)input_shape;
+ (void)filter_shape;
+ (void)output_shape;
+
+ scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h
new file mode 100644
index 000000000..3af6d0777
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::optimized_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h
new file mode 100644
index 000000000..cb365ffd0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::optimized_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h
new file mode 100644
index 000000000..62970dbf7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &filter_shape, const T *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ {
+ // MARK: At this moment this operation doesn't support
+ assert(false && "FullyConnected NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+ tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+ filter_data, bias_shape, bias_data, output_shape,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h
new file mode 100644
index 000000000..49ac35f93
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_GATHER_H
+#define LUCI_INTERPRETER_PAL_GATHER_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T, typename CoordsT = int32>
+static inline void Gather(const tflite::GatherParams &op_params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &coords_shape, const CoordsT *coords_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::optimized_ops::Gather(op_params, input_shape, input_data, coords_shape, coords_data,
+ output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_GATHER_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h
new file mode 100644
index 000000000..6c663e21f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::optimized_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h
new file mode 100644
index 000000000..aac57f2b2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
+{
+ tflite::optimized_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h
new file mode 100644
index 000000000..e8209bae6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::optimized_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h
new file mode 100644
index 000000000..54f7f0916
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
+#define LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::optimized_ops::LocalResponseNormalization(op_params, input_shape, input_data,
+ output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h
new file mode 100644
index 000000000..a32e3eec6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
+#define LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+ float beta)
+{
+ tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta);
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+ // Do nothing for linux
+ (void)params;
+ (void)input_scale;
+ (void)beta;
+}
+
+static inline void LogSoftmax(const tflite::SoftmaxParams &params, float input_scale,
+ const tflite::RuntimeShape &input_shape, const uint8 *input_data,
+ const tflite::RuntimeShape &output_shape, uint8 *output_data)
+{
+ tflite::optimized_ops::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h
new file mode 100644
index 000000000..a8a9d4abc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
+{
+ tflite::optimized_ops::Mul(params, input1_shape, input1_data, input2_shape, input2_data,
+ output_shape, output_data);
+}
+
+template <>
+inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const int64_t *input1_data, const tflite::RuntimeShape &input2_shape,
+ const int64_t *input2_data, const tflite::RuntimeShape &output_shape,
+ int64_t *output_data)
+{
+ tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h
new file mode 100644
index 000000000..797ffee1b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h
new file mode 100644
index 000000000..bf1d7954e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::optimized_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+ int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+ int32_t input_zero_point, int32_t output_zero_point,
+ Output *output_data)
+{
+ tflite::optimized_ops::Requantize(input_data, size, effective_scale_multiplier,
+ effective_scale_shift, input_zero_point, output_zero_point,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h
new file mode 100644
index 000000000..b4c715d3e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RELU_H
+#define LUCI_INTERPRETER_PAL_RELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Relu(const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::optimized_ops::Relu(input_shape, input_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void ReluX(const tflite::ReluParams &params, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
+{
+ tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h
new file mode 100644
index 000000000..bf2f91aa5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RELU6_H
+#define LUCI_INTERPRETER_PAL_RELU6_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Relu6(const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::optimized_ops::Relu6(input_shape, input_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void ReluX(const tflite::ReluParams &params, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
+{
+ tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RELU6_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h
new file mode 100644
index 000000000..7380081dc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/optimized/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::optimized_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+ output_size_shape, output_size_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..74d19265b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::optimized_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+ output_size_shape, output_size_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h
new file mode 100644
index 000000000..0ffba14f0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, int16_t *activation_state_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+ int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+ int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+ tflite::reference_ops::EvalIntegerSVDF(&params, input_shape, input_data, weight_feature_shape,
+ weight_feature_data, weight_time_shape, weight_time_data,
+ bias_shape, bias_data, activation_state_data, output_shape,
+ output_data, scratchpad_data, output_temp_data, scale_1_a,
+ scale_1_b, scale_2_a, scale_2_b, input_zp, output_zp);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, float *scratchpad_data, float *activation_state_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::EvalFloatSVDF(&params, input_shape, input_data, weight_feature_shape,
+ weight_feature_data, weight_time_shape, weight_time_data,
+ bias_shape, bias_data, scratchpad_data,
+ activation_state_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+ const luci_interpreter::DataType &input_data_type,
+ const luci_interpreter::DataType &weight_feature_data_type,
+ luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+ luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+ luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+ const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+ const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+ if (input_data_type == loco::DataType::FLOAT32 &&
+ (weight_feature_data_type == loco::DataType::S8 ||
+ weight_feature_data_type == loco::DataType::U8))
+ {
+ (void)input_shape;
+ (void)weight_time_shape;
+ (void)scratchpad_3;
+ (void)scratchpad_4;
+ (void)scratchpad_5;
+ (void)scratchpad_6;
+
+ throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+ }
+
+ // Resize scratchpad_1 tensor
+ scratchpad_1->resize({batch_size, num_filters});
+
+ if (input_data_type == loco::DataType::S8)
+ {
+ // Resize scratchpad_2 for full_integer op
+ scratchpad_2->resize({batch_size, num_units});
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h
new file mode 100644
index 000000000..640a71684
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SLICE_H
+#define LUCI_INTERPRETER_PAL_SLICE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Slice(const tflite::SliceParams &op_params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::optimized_ops::Slice(op_params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SLICE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h
new file mode 100644
index 000000000..b197e79d1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+ float beta)
+{
+ tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta);
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+ // Do nothing for linux
+ (void)params;
+ (void)input_scale;
+ (void)beta;
+}
+
+template <typename In, typename Out>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+ const tflite::RuntimeShape &input_shape, const In *input_data,
+ const tflite::RuntimeShape &output_shape, Out *output_data)
+{
+ tflite::optimized_ops::Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h
new file mode 100644
index 000000000..5e8de9ba3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+ const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::optimized_ops::SpaceToBatchND(
+ params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h
new file mode 100644
index 000000000..52d2a5bb1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape,
+ const T *input_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::optimized_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h
new file mode 100644
index 000000000..4d8da72d8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPLIT_H
+#define LUCI_INTERPRETER_PAL_SPLIT_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename Scalar>
+static inline void Split(const tflite::SplitParams &params, const tflite::RuntimeShape &input_shape,
+ const Scalar *input_data, const tflite::RuntimeShape *const *output_shapes,
+ Scalar *const *output_data)
+{
+ tflite::optimized_ops::Split(params, input_shape, input_data, output_shapes, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPLIT_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h
new file mode 100644
index 000000000..04080d619
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+ const tflite::RuntimeShape &input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &input2_shape, const T *input2_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::optimized_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+ output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake
new file mode 100644
index 000000000..185700cf9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake
@@ -0,0 +1,82 @@
+macro(initialize_pal)
+ nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+
+ if (NOT TensorFlowSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowGEMMLowpSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowEigenSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Eigen not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowRuySource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Ruy not found")
+ return()
+ endif ()
+
+ find_package(Threads REQUIRED)
+
+ set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+ target_include_directories(${TGT} PRIVATE "${PAL}")
+ target_include_directories(${TGT} SYSTEM PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}")
+ target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+ # TODO put it back, I changed my mind.
+ # instead add sources with visitors in this library
+ set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+
+ if(BUILD_ARM32_NEON)
+ # NOTE may need to revise this list for version upgrade
+ set(PAL_SOURCES ${PAL_SOURCES}
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc
+ ${TensorFlowRuySource_DIR}/ruy/allocator.cc
+ ${TensorFlowRuySource_DIR}/ruy/block_map.cc
+ ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc
+ ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc
+ ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc
+ ${TensorFlowRuySource_DIR}/ruy/ctx.cc
+ ${TensorFlowRuySource_DIR}/ruy/denormal.cc
+ ${TensorFlowRuySource_DIR}/ruy/frontend.cc
+ ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc
+ ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc
+ ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc
+ ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc
+ ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc
+ ${TensorFlowRuySource_DIR}/ruy/trmul.cc
+ ${TensorFlowRuySource_DIR}/ruy/tune.cc
+ ${TensorFlowRuySource_DIR}/ruy/wait.cc
+ ${TensorFlowRuySource_DIR}/ruy/kernel_arm32.cc
+ )
+ endif(BUILD_ARM32_NEON)
+
+ add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES})
+ set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+ target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}"
+ )
+
+ target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_linux_pal)
+endmacro()
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst
new file mode 100644
index 000000000..f0df58db3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst
@@ -0,0 +1,62 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+ const T2 *axis, const tflite::RuntimeShape &output_shape,
+ T3 *output_data, const std::greater<T1> cmp)
+{
+ tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h
new file mode 100644
index 000000000..cce30601f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation doesn't support
+ assert(false && "AveragePool NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape,
+ int8_t *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+
+ tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape,
+ output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ (void)input_data_type;
+ (void)input_shape;
+ (void)output_shape;
+
+ scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h
new file mode 100644
index 000000000..4dd77ffdc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::BatchToSpaceND(
+ unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h
new file mode 100644
index 000000000..13976877a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &filter_shape,
+ const float *filter_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, const tflite::RuntimeShape &output_shape,
+ float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ float *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data,
+ tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+ const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+ const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+ uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ uint8 *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+ scratchpad_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+ const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+ const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+ const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+ int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ int8 *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+ filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::ConvParams &params,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ (void)input_data_type;
+ (void)params;
+ (void)input_shape;
+ (void)filter_shape;
+ (void)output_shape;
+ scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h
new file mode 100644
index 000000000..8463e571e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape,
+ const T *input_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..c9d1a2948
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &filter_shape,
+ const T *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+ T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "DepthwiseConvPerChannel NYI");
+ (void)params;
+ (void)output_multiplier;
+ (void)output_shift;
+ (void)input_shape;
+ (void)output_data;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+ const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_integer_ops::DepthwiseConvPerChannel(
+ params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const tflite::DepthwiseParams &params,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ (void)params;
+ (void)input_data_type;
+ (void)input_shape;
+ (void)filter_shape;
+ (void)output_shape;
+
+ scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h
new file mode 100644
index 000000000..15ff0327b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+ output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h
new file mode 100644
index 000000000..4089d0a0c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/elu.h>
+
+namespace luci_interpreter_pal
+{
+
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h
new file mode 100644
index 000000000..048624d74
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &filter_shape, const T *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "FullyConnected NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+ tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+ filter_data, bias_shape, bias_data, output_shape,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h
new file mode 100644
index 000000000..f84742a44
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/reference/l2normalization.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h
new file mode 100644
index 000000000..38a302fc6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
+{
+ tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h
new file mode 100644
index 000000000..9ccd2224f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h
new file mode 100644
index 000000000..347a97a83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/mul.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
+{
+ tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h
new file mode 100644
index 000000000..be5903a0c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/reference/neg.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h
new file mode 100644
index 000000000..6046789ae
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+ int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+ int32_t input_zero_point, int32_t output_zero_point,
+ Output *output_data)
+{
+ tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+ effective_scale_shift, input_zero_point, output_zero_point,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h
new file mode 100644
index 000000000..cc9f0fd54
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+ output_size_shape, output_size_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..f4d5a6ed3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+ output_size_shape, output_size_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h
new file mode 100644
index 000000000..3bba668fb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, int16_t *activation_state_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+ int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+ int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+ const int n_rank = params.rank;
+ const int n_batch = input_shape.Dims(0);
+ const int n_input = input_shape.Dims(1);
+ const int n_filter = weight_feature_shape.Dims(0);
+ const int n_unit = n_filter / n_rank;
+ const int n_memory = weight_time_shape.Dims(1);
+
+ // Left shift the activation_state.
+ {
+ int16_t *new_state_start = activation_state_data;
+ const int16_t *old_state_start = activation_state_data + 1;
+ const int16_t *old_state_end = activation_state_data + n_batch * n_filter * n_memory;
+ while (old_state_start != old_state_end)
+ {
+ *new_state_start++ = *old_state_start++;
+ }
+ }
+
+ // Note: no need to clear the latest activation, matmul is not accumulative.
+
+ // Feature matmul.
+ {
+ const int32_t output_max = std::numeric_limits<int16_t>::max();
+ const int32_t output_min = std::numeric_limits<int16_t>::min();
+ int16_t *result_in_batch = activation_state_data + (n_memory - 1);
+ for (int b = 0; b < n_batch; b++)
+ {
+ const int8_t *matrix_ptr = weight_feature_data;
+ for (int r = 0; r < n_filter; r++)
+ {
+ int32_t dot_prod = 0;
+ const int8_t *vector_in_batch = input_data + b * n_input;
+ for (int c = 0; c < n_input; c++)
+ {
+ dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
+ }
+ dot_prod = tflite::MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b);
+ dot_prod = std::min(std::max(output_min, dot_prod), output_max);
+ // This assumes state is symmetrically quantized. Otherwise last bit of
+ // state should be initialized to its zero point and accumulate the
+ // dot_prod.
+ // Equivalent as the following:
+ // result_in_batch = zero point, which happens to be zero.
+ // result_in_batch += dot_prod_56.
+ *result_in_batch = dot_prod;
+ result_in_batch += n_memory;
+ }
+ }
+ }
+
+ // Time.
+ {
+ for (int b = 0; b < n_batch; ++b)
+ {
+ int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+ // Perform batched vector dot product:
+ const int16_t *vector1_ptr = weight_time_data;
+ const int16_t *vector2_ptr = activation_state_data + b * n_memory * n_filter;
+
+ for (int i = 0; i < n_filter; i++)
+ {
+ *scratch_ptr_batch = 0;
+ for (int j = 0; j < n_memory; j++)
+ {
+ *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
+ }
+ scratch_ptr_batch++;
+ }
+ }
+ }
+
+ // Reduce, add bias, rescale, activation.
+ {
+ // Add bias.
+ if (bias_data)
+ {
+ // Vector batch assign:
+ for (int i = 0; i < n_batch; ++i)
+ {
+ int32_t *output_ptr = output_temp_data + i * n_unit;
+ const int32_t *bias_ptr = bias_data;
+ for (int j = 0; j < n_unit; ++j)
+ {
+ *output_ptr++ = *bias_ptr++;
+ }
+ }
+ }
+ else
+ {
+ int32_t *output_ptr = output_temp_data;
+ for (int i = 0; i < n_batch * n_unit; ++i)
+ {
+ *output_ptr++ = 0;
+ }
+ }
+
+ // Reduce.
+ for (int b = 0; b < n_batch; ++b)
+ {
+ int32_t *output_temp_ptr = output_temp_data + b * n_unit;
+ int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+ // Reduction sum vector
+ for (int i = 0; i < n_unit; ++i)
+ {
+ for (int j = 0; j < n_rank; ++j)
+ {
+ output_temp_ptr[i] += *scratch_ptr_batch++;
+ }
+ }
+ }
+
+ // Rescale.
+ const int32_t output_max = std::numeric_limits<int8_t>::max();
+ const int32_t output_min = std::numeric_limits<int8_t>::min();
+ for (int i = 0; i < n_batch * n_unit; ++i)
+ {
+ int32_t x1 = output_temp_data[i];
+ int32_t x2 = tflite::MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b);
+ int32_t x3 = x2 + output_zp;
+ int32_t x4 = std::min(std::max(output_min, x3), output_max);
+ output_data[i] = static_cast<int8_t>(x4);
+ }
+ }
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, float *scratchpad_data, float *activation_state_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ const int32_t rank = params.rank;
+ const int32_t batch_size = input_shape.Dims(0);
+ const int32_t input_size = input_shape.Dims(1);
+ const int32_t num_filters = weight_feature_shape.Dims(0);
+ const int32_t num_units = num_filters / rank;
+ const int32_t memory_size = weight_time_shape.Dims(1);
+
+ // Left shift the activation_state.
+ {
+ float *new_state_start = activation_state_data;
+ const float *old_state_start = activation_state_data + 1;
+ const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+ while (old_state_start != old_state_end)
+ {
+ *new_state_start++ = *old_state_start++;
+ }
+ }
+
+ // Note: no need to clear the latest activation, matmul is not accumulative.
+
+ // Compute conv1d(inputs, weights_feature).
+ // The activation_state's rightmost column is used to save current cycle
+ // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+ // having the stride equal to memory_size.
+
+ // Perform batched matrix vector multiply operation:
+ {
+ const float *matrix = weight_feature_data;
+ const float *vector = input_data;
+ float *result = &activation_state_data[memory_size - 1];
+ float *result_in_batch = result;
+ for (int i = 0; i < batch_size; ++i)
+ {
+ const float *matrix_ptr = matrix;
+ for (int j = 0; j < num_filters; ++j)
+ {
+ float dot_prod = 0.0f;
+ const float *vector_in_batch = vector + i * input_size;
+ for (int k = 0; k < input_size; ++k)
+ {
+ dot_prod += *matrix_ptr++ * *vector_in_batch++;
+ }
+ *result_in_batch = dot_prod;
+ result_in_batch += memory_size;
+ }
+ }
+ }
+
+ tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+ batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+ params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+ const luci_interpreter::DataType &input_data_type,
+ const luci_interpreter::DataType &weight_feature_data_type,
+ luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+ luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+ luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+ const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+ const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+ if (input_data_type == loco::DataType::FLOAT32 &&
+ (weight_feature_data_type == loco::DataType::S8 ||
+ weight_feature_data_type == loco::DataType::U8))
+ {
+ (void)input_shape;
+ (void)weight_time_shape;
+ (void)scratchpad_3;
+ (void)scratchpad_4;
+ (void)scratchpad_5;
+ (void)scratchpad_6;
+
+ throw std::runtime_error("Hybrid type is not currently supported for mcu platform");
+ }
+
+ // Resize scratchpad_1 tensor
+ scratchpad_1->resize({batch_size, num_filters});
+
+ if (input_data_type == loco::DataType::S8)
+ {
+ // Resize scratchpad_2 for full_integer op
+ scratchpad_2->resize({batch_size, num_units});
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h
new file mode 100644
index 000000000..9838b542d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+ float beta)
+{
+ // Do nothing for mcu
+ (void)data;
+ (void)input_scale;
+ (void)beta;
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+ int32 input_beta_multiplier;
+ int input_beta_left_shift;
+ static const int kScaledDiffIntegerBits = 5;
+ tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
+ &input_beta_multiplier, &input_beta_left_shift);
+
+ params->input_multiplier = input_beta_multiplier;
+ params->input_left_shift = input_beta_left_shift;
+ params->diff_min =
+ -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift);
+}
+
+template <typename T>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ // MARK: At this moment this operation doesn't support on mcu
+ assert(false && "Softmax NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)output_shape;
+ (void)output_data;
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h
new file mode 100644
index 000000000..fdddaa929
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+ const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::SpaceToBatchND(
+ params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h
new file mode 100644
index 000000000..816b7f663
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape,
+ const T *input_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h
new file mode 100644
index 000000000..ea57578c6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/reference/sub.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+ const tflite::RuntimeShape &input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &input2_shape, const T *input2_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+ output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake
new file mode 100644
index 000000000..907d51de6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake
@@ -0,0 +1,56 @@
+macro(initialize_pal)
+ nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+
+ if (NOT TensorFlowSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowGEMMLowpSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowEigenSource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Eigen not found")
+ return()
+ endif ()
+
+ if (NOT TensorFlowRuySource_FOUND)
+ message(STATUS "Skipping luci-interpreter: Ruy not found")
+ return()
+ endif ()
+ #find_package(Threads REQUIRED)
+
+ set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+ target_include_directories(${TGT} PRIVATE "${PAL}")
+ target_include_directories(${TGT} PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}")
+ target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+ # TODO put it back, I changed my mind.
+ # instead add sources with visitors in this library
+ set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
+ add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES})
+ set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+ target_include_directories(luci_interpreter_mcu_pal PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}"
+ )
+
+ target_link_libraries(${TGT} PRIVATE luci_interpreter_mcu_pal)
+ #target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_mcu_pal)
+endmacro()
diff --git a/compiler/luci-micro/luci-interpreter/requires.cmake b/compiler/luci-micro/luci-interpreter/requires.cmake
new file mode 100644
index 000000000..f411f387a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/requires.cmake
@@ -0,0 +1 @@
+require(luci)
diff --git a/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp
new file mode 100644
index 000000000..6ad1f320c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/BuddyMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize)
+{
+ int32_t p = lowerLog2(memSize);
+
+ // We assume that the requested size of memory does not exceed 4 GB
+ assert(p < 32);
+ memSize = 1 << p;
+
+ _start_block = reinterpret_cast<Block *>(memory_start);
+ _start_block->size = memSize - sizeof(Block);
+ _start_block->is_free = true;
+ _start_block->self = _start_block;
+ _num_blocks = 0;
+ _size = _start_block->size;
+
+ for (auto &_free_block : _free_blocks)
+ _free_block = nullptr;
+
+ addToBlocks(_start_block, p);
+}
+
+void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+ const size_t element_size = getDataTypeSize(tensor.element_type());
+ const int32_t num_elements = tensor.shape().num_elements();
+ auto size = num_elements * element_size;
+ auto footprint = size + sizeof(Block);
+ auto l = (footprint & (footprint - 1)) == 0
+ ? lowerLog2(footprint)
+ : lowerLog2(footprint) + 1; // check footprint is pow_of_2
+
+ while (l < 32 && !_free_blocks[l])
+ l++;
+
+ assert(l < 32);
+
+ Block *tmp;
+ tmp = _free_blocks[l];
+ removeFromBlocks(tmp, l);
+
+ while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block))
+ {
+ divideBlock(tmp, l);
+ l--;
+ }
+
+ tmp->is_free = false;
+ tmp->self = tmp;
+ _num_blocks++;
+
+ auto *data = (uint8_t *)(tmp + 1);
+ tensor.set_data_buffer(data);
+}
+
+void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+ auto data = tensor.data<void>();
+ auto *tmp = (Block *)((uint8_t *)data - sizeof(Block));
+
+ assert(tmp->self == tmp);
+
+ tmp->is_free = true;
+ addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block)));
+
+ while (tmp)
+ if (tmp->size == _size)
+ break;
+ else
+ tmp = mergeBlock(tmp);
+
+ _num_blocks--;
+ tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp
new file mode 100644
index 000000000..29fb767b7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/BuddyMemoryManager.h"
+#include <gtest/gtest.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(BuddyMemoryManager, basic)
+{
+ auto mem_pool = std::make_unique<uint8_t[]>(200);
+ auto buddy_memory_manager = std::make_unique<BuddyMemoryManager>(mem_pool.get(), 130);
+ Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor");
+
+ buddy_memory_manager->allocate_memory(first_tensor);
+
+ uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+ first_tensor.writeData(data_1, 8);
+ uint8_t array_1[8];
+ first_tensor.readData(array_1, 8);
+ for (int i = 0; i < 8; i++)
+ {
+ EXPECT_EQ(data_1[i], array_1[i]);
+ }
+
+ Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor");
+ buddy_memory_manager->allocate_memory(second_tensor);
+
+ uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}};
+ second_tensor.writeData(data_2, 10);
+
+ uint8_t array_2[2][5];
+ second_tensor.readData(array_2, 10);
+ for (int i = 0; i < 2; i++)
+ {
+ for (int j = 0; j < 5; j++)
+ {
+ EXPECT_EQ(data_2[i][j], array_2[i][j]);
+ }
+ }
+
+ buddy_memory_manager->release_memory(first_tensor);
+ EXPECT_EQ(first_tensor.data<void>(), nullptr);
+
+ buddy_memory_manager->release_memory(second_tensor);
+ EXPECT_EQ(second_tensor.data<void>(), nullptr);
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt
new file mode 100644
index 000000000..997b75a84
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt
@@ -0,0 +1,61 @@
+include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake")
+
+initialize_pal()
+
+if (NOT PAL_INITIALIZED)
+ message("PAL Failed to initialize, skip luci-interpreter")
+ return()
+endif()
+
+message(STATUS "LUCI INTERPRETER BEGIN")
+
+set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import${LUCI_INTERPRETER_SUFFIX}")
+
+add_subdirectory(core)
+message(STATUS "LUCI INTERPRETER CORE")
+add_subdirectory(kernels)
+message(STATUS "LUCI INTERPRETER KERNELS")
+add_subdirectory(loader)
+message(STATUS "LUCI INTERPRETER LOADER")
+add_subdirectory(import)
+message(STATUS "LUCI INTERPRETER IMPORT")
+
+message(STATUS "LUCI INTERPTER INITALIZED")
+
+set(SOURCES
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h"
+ Interpreter.cpp "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" SimpleMemoryManager.cpp
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" TestMemoryManager.cpp
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/BuddyMemoryManager.h" BuddyMemoryManager.cpp
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/StaticMemoryManager.h" StaticMemoryManager.cpp)
+
+if (NOT LUCI_INTERPRETER_STATIC)
+ add_library(${LUCI_INTERPRETER_BINARY} SHARED ${SOURCES})
+else ()
+ add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES})
+endif ()
+
+set(TEST_SOURCES BuddyMemoryManager.test.cpp)
+
+target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_BINARY}
+ PUBLIC luci_lang ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE}
+ PRIVATE nncc_common)
+
+install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+ FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(buddy_manager_test ${TEST_SOURCES})
+target_link_libraries(buddy_manager_test ${LUCI_INTERPRETER_BINARY})
diff --git a/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp b/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp
new file mode 100644
index 000000000..8cf272efd
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/Interpreter.h"
+#include "luci_interpreter/SimpleMemoryManager.h"
+
+#include "loader/ModuleLoader.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+class EventNotifierImpl final : public EventNotifier
+{
+public:
+ EventNotifierImpl(const RuntimeToIR &runtime_to_ir,
+ const std::vector<ExecutionObserver *> &observers)
+ : _runtime_to_ir(runtime_to_ir), _observers(observers)
+ {
+ }
+
+ void postTensorWrite(const Tensor *tensor) override
+ {
+ assert(tensor != nullptr);
+ for (const auto &observer : _observers)
+ {
+ observer->postTensorWrite(_runtime_to_ir.tensor_to_node.at(tensor), tensor);
+ }
+ }
+
+ void preOperatorExecute(const Kernel *kernel) override
+ {
+ assert(kernel != nullptr);
+ for (const auto &observer : _observers)
+ {
+ observer->preOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel));
+ }
+ }
+
+ void postOperatorExecute(const Kernel *kernel) override
+ {
+ assert(kernel != nullptr);
+ for (const auto &observer : _observers)
+ {
+ observer->postOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel));
+ }
+ }
+
+private:
+ const RuntimeToIR &_runtime_to_ir;
+ const std::vector<ExecutionObserver *> &_observers;
+};
+
+} // namespace
+
+Interpreter::Interpreter(const luci::Module *module)
+{
+ _runtime_to_ir = std::make_unique<RuntimeToIR>();
+ _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
+ _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
+
+ _default_memory_manager = std::make_unique<SimpleMemoryManager>();
+
+ ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
+ _default_memory_manager.get());
+ loader.load();
+}
+
+Interpreter::Interpreter(const luci::Module *module,
+ luci_interpreter::IMemoryManager *memory_manager)
+{
+ assert(memory_manager && "Use Interpreter::Interpreter(module) constructor instead");
+
+ _runtime_to_ir = std::make_unique<RuntimeToIR>();
+ _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
+ _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
+
+ ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
+ memory_manager);
+ loader.load();
+}
+
+Interpreter::~Interpreter() = default;
+
+void Interpreter::writeInputTensor(const luci::CircleInput *input_node, const void *data,
+ size_t data_size)
+{
+ Tensor *tensor = _runtime_module->getInputTensors()[input_node->index()];
+ if (tensor == nullptr)
+ {
+ const std::string &name = input_node->name();
+ throw std::runtime_error("Cannot find tensor for input node named \"" + name + "\".");
+ }
+ if (data != nullptr)
+ tensor->writeData(data, data_size);
+}
+
+void Interpreter::readOutputTensor(const luci::CircleOutput *output_node, void *data,
+ size_t data_size)
+{
+ Tensor *tensor = _runtime_module->getOutputTensors()[output_node->index()];
+ if (tensor == nullptr)
+ {
+ const std::string &name = output_node->name();
+ throw std::runtime_error("Cannot find tensor for output node named \"" + name + "\".");
+ }
+ if (data != nullptr)
+ tensor->readData(data, data_size);
+}
+
+void Interpreter::interpret() { _runtime_module->execute(); }
+
+void Interpreter::attachObserver(ExecutionObserver *observer)
+{
+ if (std::find(_observers.cbegin(), _observers.cend(), observer) != _observers.cend())
+ throw std::runtime_error("Observer is already attached.");
+ _observers.push_back(observer);
+}
+
+ExecutionObserver::~ExecutionObserver() = default;
+
+void ExecutionObserver::postTensorWrite(const luci::CircleNode *, const Tensor *) {}
+
+void ExecutionObserver::preOperatorExecute(const luci::CircleNode *) {}
+
+void ExecutionObserver::postOperatorExecute(const luci::CircleNode *) {}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp
new file mode 100644
index 000000000..230e39896
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/SimpleMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void SimpleMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+ if (!tensor.is_allocatable())
+ {
+ return;
+ }
+ if (tensor.is_data_allocated())
+ {
+ release_memory(tensor);
+ }
+ const auto element_size = getDataTypeSize(tensor.element_type());
+ const auto num_elements = tensor.shape().num_elements();
+
+ auto *data = new uint8_t[num_elements * element_size];
+ tensor.set_data_buffer(data);
+}
+
+void SimpleMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+ if (!tensor.is_data_allocated())
+ {
+ tensor.set_data_buffer(nullptr);
+ return;
+ }
+ auto data = tensor.data<uint8_t>();
+ delete[] data;
+ tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp
new file mode 100644
index 000000000..73a819919
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/StaticMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void StaticMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+ if (!tensor.is_allocatable())
+ {
+ return;
+ }
+ int32_t offset = tensor.get_offset();
+ assert(offset >= 0);
+ auto tensor_ptr = _buffer_ptr + offset;
+ tensor.set_data_buffer(tensor_ptr);
+}
+
+void StaticMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+ tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp
new file mode 100644
index 000000000..3beeee55c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+ if (!tensor.is_allocatable())
+ {
+ return;
+ }
+ if (tensor.is_data_allocated())
+ {
+ release_memory(tensor);
+ }
+ const auto element_size = getDataTypeSize(tensor.element_type());
+ const auto num_elements = tensor.shape().num_elements();
+
+ auto *data = new uint8_t[num_elements * element_size];
+ allocations.push_back(data);
+ tensor.set_data_buffer(data);
+}
+
+void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+ tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt
new file mode 100644
index 000000000..c2471e01c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt
@@ -0,0 +1,19 @@
+set(SOURCES
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/DataType.h"
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/Tensor.h"
+ EventNotifier.h
+ Kernel.h
+ KernelParams.h
+ RuntimeGraph.h
+ RuntimeGraph.cpp
+ RuntimeModule.h
+ Tensor.cpp)
+
+add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang)
+target_link_libraries(${LUCI_INTERPRETER_CORE} PRIVATE nncc_common)
diff --git a/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h b/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h
new file mode 100644
index 000000000..5c4fbd3be
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
+#define LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
+
+namespace luci_interpreter
+{
+
+// Used at execution stage to tell the interpreter that the runtime state has changed in some way.
+class EventNotifier
+{
+public:
+ virtual ~EventNotifier() = default;
+
+ virtual void postTensorWrite(const Tensor *tensor) = 0;
+ virtual void preOperatorExecute(const Kernel *kernel) = 0;
+ virtual void postOperatorExecute(const Kernel *kernel) = 0;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/Kernel.h b/compiler/luci-micro/luci-interpreter/src/core/Kernel.h
new file mode 100644
index 000000000..a7c4a4218
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/Kernel.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_KERNEL_H
+#define LUCI_INTERPRETER_CORE_KERNEL_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+
+// Base class for all kernels.
+class Kernel
+{
+protected:
+ Kernel(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs)
+ : _inputs(std::move(inputs)), _outputs(std::move(outputs))
+ {
+ }
+
+public:
+ virtual ~Kernel() = default;
+
+ const std::vector<const Tensor *> &getInputTensors() const { return _inputs; }
+ const std::vector<Tensor *> &getOutputTensors() const { return _outputs; }
+
+ // Configures the kernel.
+ // This function is currently called once for each kernel during interpreter construction,
+ // which makes it a convenient place for preparing (resizing) output tensors.
+ virtual void configure() = 0;
+
+ // Executes the kernel.
+ virtual void execute() const = 0;
+
+protected:
+ // NOTE Prefer not to use these in derived classes.
+ const std::vector<const Tensor *> _inputs;
+ const std::vector<Tensor *> _outputs;
+};
+
+// Base class for kernels with parameters.
+template <typename Params> class KernelWithParams : public Kernel
+{
+protected:
+ KernelWithParams(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
+ const Params &params)
+ : Kernel(std::move(inputs), std::move(outputs)), _params(params)
+ {
+ }
+
+public:
+ const Params &params() const { return _params; }
+
+protected:
+ const Params _params;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_KERNEL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h b/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h
new file mode 100644
index 000000000..6c0220c62
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_KERNELPARAMS_H
+#define LUCI_INTERPRETER_CORE_KERNELPARAMS_H
+
+#include <luci/IR/AttrPadding.h>
+#include <luci/IR/AttrFusedActFunc.h>
+#include <luci/IR/AttrMirrorPadMode.h>
+#include <luci_interpreter/core/DataType.h>
+
+#include <cstdint>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+// Inject commonly used types into `luci_interpreter` namespace for convenience.
+using Activation = luci::FusedActFunc;
+using Padding = luci::Padding;
+using MirrorPadMode = luci::MirrorPadMode;
+
+struct AddParams
+{
+ Activation activation;
+};
+
+struct ArgMaxParams
+{
+ DataType output_type;
+};
+
+struct BatchMatMulParams
+{
+ bool adj_x;
+ bool adj_y;
+};
+
+struct ConcatenationParams
+{
+ int axis;
+ Activation activation;
+};
+
+struct Conv2DParams
+{
+ Padding padding;
+ int32_t stride_height;
+ int32_t stride_width;
+ int32_t dilation_height_factor;
+ int32_t dilation_width_factor;
+ Activation activation;
+};
+
+struct DepthToSpaceParams
+{
+ int block_size;
+};
+
+struct DepthwiseConv2DParams
+{
+ Padding padding;
+ int32_t depth_multiplier; // TODO Remove, as it can be calculated.
+ int32_t stride_height;
+ int32_t stride_width;
+ int32_t dilation_height_factor;
+ int32_t dilation_width_factor;
+ Activation activation;
+};
+
+struct DivParams
+{
+ Activation activation;
+};
+
+struct FullyConnectedParams
+{
+ Activation activation;
+ bool keep_num_dims = false;
+};
+
+struct GatherParams
+{
+ int32_t axis;
+ int32_t batch_dims;
+};
+
+struct InstanceNormParams
+{
+ float epsilon;
+ Activation activation;
+};
+
+struct L2NormParams
+{
+ Activation activation;
+};
+
+struct LeakyReluParams
+{
+ float alpha;
+};
+
+struct LocalResponseNormalizationParams
+{
+ int32_t radius;
+ float bias;
+ float alpha;
+ float beta;
+};
+
+struct MirrorPadParams
+{
+ MirrorPadMode mode;
+};
+
+struct MulParams
+{
+ Activation activation;
+};
+
+struct OneHotParams
+{
+ int32_t axis;
+};
+
+struct PackParams
+{
+ int32_t values_count;
+ int32_t axis;
+};
+
+struct Pool2DParams
+{
+ Padding padding;
+ int32_t filter_height;
+ int32_t filter_width;
+ int32_t stride_height;
+ int32_t stride_width;
+ Activation activation;
+};
+
+struct ReducerParams
+{
+ bool keep_dims;
+};
+
+struct ResizeBilinearParams
+{
+ bool align_corners;
+ bool half_pixel_centers;
+};
+
+struct ResizeNearestNeighborParams
+{
+ bool align_corners;
+ bool half_pixel_centers;
+};
+
+struct ShapeParams
+{
+ loco::DataType out_type;
+};
+
+struct SubParams
+{
+ Activation activation;
+};
+
+struct SVDFParams
+{
+ bool asymmetric_quantize_inputs;
+ int32_t svdf_rank;
+ Activation activation;
+};
+
+struct SpaceToDepthParams
+{
+ int block_size;
+};
+
+struct SoftmaxParams
+{
+ float beta;
+};
+
+struct StridedSliceParams
+{
+ int32_t begin_mask;
+ int32_t end_mask;
+ int32_t ellipsis_mask;
+ int32_t new_axis_mask;
+ int32_t shrink_axis_mask;
+};
+
+struct SqueezeParams
+{
+ std::vector<int32_t> squeeze_dims;
+};
+
+struct TransposeConvParams
+{
+ Padding padding;
+ int32_t stride_height;
+ int32_t stride_width;
+};
+
+struct UnpackParams
+{
+ int axis;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_KERNELPARAMS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp
new file mode 100644
index 000000000..c2f8d2ea8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeGraph.h"
+
+#include "core/RuntimeModule.h"
+
+#include <algorithm>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class RuntimeGraph::TensorAllocPlan
+{
+ std::vector<std::vector<Tensor *>> _alloc_plan;
+ std::vector<std::vector<Tensor *>> _dealloc_plan;
+ bool _valid = false;
+ IMemoryManager *_memory_manager;
+
+public:
+ explicit TensorAllocPlan(IMemoryManager *memory_manager);
+ void invalidate() { _valid = false; }
+ bool isValid() const { return _valid; }
+ void build(const RuntimeGraph &graph);
+ void allocate(size_t kernel_index) const;
+ void deallocate(size_t kernel_index) const;
+};
+
+RuntimeGraph::TensorAllocPlan::TensorAllocPlan(IMemoryManager *memory_manager)
+ : _memory_manager(memory_manager)
+{
+}
+
+void RuntimeGraph::TensorAllocPlan::build(const RuntimeGraph &graph)
+{
+ invalidate();
+ using Lifetime = std::pair<size_t, size_t>;
+ std::unordered_map<Tensor *, Lifetime> lifetimes;
+ const size_t num_kernels = graph._kernels.size();
+ for (size_t index = 0; index < num_kernels; ++index)
+ {
+ const auto &kernel = graph._kernels[index];
+ for (const Tensor *tensor : kernel->getInputTensors())
+ {
+ auto nc_tensor = const_cast<Tensor *>(tensor);
+ if (lifetimes.count(nc_tensor) > 0)
+ lifetimes.at(nc_tensor).second = index;
+ }
+ for (Tensor *tensor : kernel->getOutputTensors())
+ {
+ assert(lifetimes.count(tensor) == 0);
+ lifetimes[tensor] = Lifetime(index, index);
+ }
+ }
+ for (const Tensor *tensor : graph.getOutputTensors())
+ {
+ auto nc_tensor = const_cast<Tensor *>(tensor);
+ if (lifetimes.count(nc_tensor) > 0)
+ lifetimes.at(nc_tensor).second = num_kernels;
+ }
+ _alloc_plan.assign(num_kernels, std::vector<Tensor *>());
+ _dealloc_plan.assign(num_kernels + 1, std::vector<Tensor *>());
+ for (const auto &item : lifetimes)
+ {
+ _alloc_plan[item.second.first].push_back(item.first);
+ _dealloc_plan[item.second.second].push_back(item.first);
+ }
+ _valid = true;
+}
+
+void RuntimeGraph::TensorAllocPlan::allocate(size_t kernel_index) const
+{
+ assert(_valid && kernel_index < _alloc_plan.size());
+ for (Tensor *tensor : _alloc_plan[kernel_index])
+ {
+ _memory_manager->allocate_memory(*tensor);
+ }
+}
+
+void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const
+{
+ assert(_valid && kernel_index < _dealloc_plan.size());
+ for (Tensor *tensor : _dealloc_plan[kernel_index])
+ {
+ _memory_manager->release_memory(*tensor);
+ }
+}
+
+RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager)
+ : _owning_module(owning_module), _memory_manager(memory_manager),
+ _tensor_alloc_plan(std::make_unique<TensorAllocPlan>(memory_manager))
+{
+}
+
+RuntimeGraph::~RuntimeGraph()
+{
+ for (auto &tensor : _tensors)
+ {
+ if (tensor->is_data_allocated())
+ _memory_manager->release_memory(*tensor);
+ }
+}
+
+Tensor *RuntimeGraph::addTensor(std::unique_ptr<Tensor> &&tensor)
+{
+ assert(tensor != nullptr);
+ _tensors.push_back(std::move(tensor));
+ return _tensors.back().get();
+}
+
+void RuntimeGraph::setInputTensors(const std::vector<Tensor *> &input_tensors)
+{
+ assert(std::all_of(input_tensors.cbegin(), input_tensors.cend(),
+ [](Tensor *tensor) { return tensor != nullptr; }));
+ _input_tensors = input_tensors;
+}
+
+void RuntimeGraph::setOutputTensors(const std::vector<Tensor *> &output_tensors)
+{
+ assert(std::all_of(output_tensors.cbegin(), output_tensors.cend(),
+ [](Tensor *tensor) { return tensor != nullptr; }));
+ _output_tensors = output_tensors;
+}
+
+void RuntimeGraph::configureAllocations(Tensor *tensor)
+{
+ _memory_manager->allocate_memory(*tensor);
+}
+
+void RuntimeGraph::addKernel(std::unique_ptr<Kernel> &&kernel)
+{
+ assert(kernel != nullptr);
+ _kernels.push_back(std::move(kernel));
+ _tensor_alloc_plan->invalidate();
+}
+
+void RuntimeGraph::execute() const
+{
+ if (!_tensor_alloc_plan->isValid())
+ _tensor_alloc_plan->build(*this);
+
+ EventNotifier *event_notifier = _owning_module->getEventNotifier();
+
+ // Notify the observers that the input tensors have changed.
+ if (event_notifier != nullptr)
+ {
+ for (const Tensor *input_tensor : getInputTensors())
+ {
+ if (input_tensor->is_observable())
+ event_notifier->postTensorWrite(input_tensor);
+ }
+ }
+
+ for (size_t index = 0; index < _kernels.size(); ++index)
+ {
+ const auto &kernel = _kernels[index];
+ if (event_notifier != nullptr)
+ {
+ event_notifier->preOperatorExecute(kernel.get());
+ }
+
+ // TODO The `configure` method should only be called if the outputs of an operator need to be
+ // resized.
+ kernel->configure();
+
+ // Preallocate outputs in advance instead of relying on automatic allocation
+ _tensor_alloc_plan->allocate(index);
+
+ kernel->execute();
+
+ if (event_notifier != nullptr)
+ {
+ event_notifier->postOperatorExecute(kernel.get());
+ }
+
+ for (const Tensor *tensor : kernel->getOutputTensors())
+ {
+ if (event_notifier != nullptr && tensor->is_observable())
+ {
+ event_notifier->postTensorWrite(tensor);
+ }
+ }
+ _tensor_alloc_plan->deallocate(index);
+ }
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h
new file mode 100644
index 000000000..8184e249d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+#define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+
+#include "luci_interpreter/core/Tensor.h"
+#include "luci_interpreter/MemoryManager.h"
+#include "core/Kernel.h"
+
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class RuntimeModule;
+
+class RuntimeGraph
+{
+private:
+ class TensorAllocPlan;
+ friend class TensorAllocPlan;
+
+public:
+ explicit RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager);
+ ~RuntimeGraph();
+
+ Tensor *addTensor(std::unique_ptr<Tensor> &&tensor);
+
+ void setInputTensors(const std::vector<Tensor *> &input_tensors);
+ void setOutputTensors(const std::vector<Tensor *> &output_tensors);
+
+ void configureAllocations(Tensor *tensor);
+
+ const std::vector<Tensor *> &getInputTensors() const { return _input_tensors; }
+ const std::vector<Tensor *> &getOutputTensors() const { return _output_tensors; }
+
+ void addKernel(std::unique_ptr<Kernel> &&kernel);
+
+ void execute() const;
+
+private:
+ IMemoryManager *_memory_manager;
+ RuntimeModule *_owning_module;
+ std::vector<std::unique_ptr<Tensor>> _tensors;
+ std::vector<Tensor *> _input_tensors;
+ std::vector<Tensor *> _output_tensors;
+
+ // Kernels in execution order.
+ std::vector<std::unique_ptr<Kernel>> _kernels;
+ // Tensors that are not used anymore after given op
+ std::unique_ptr<TensorAllocPlan> _tensor_alloc_plan;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h b/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h
new file mode 100644
index 000000000..78873b0ec
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+#define LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+
+#include "core/RuntimeGraph.h"
+#include "core/EventNotifier.h"
+#include "luci_interpreter/MemoryManager.h"
+
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class RuntimeModule
+{
+public:
+ explicit RuntimeModule(EventNotifier *event_notifier) : _event_notifier(event_notifier) {}
+
+ EventNotifier *getEventNotifier() const { return _event_notifier; }
+
+ RuntimeGraph *addGraph(IMemoryManager *memory_manager)
+ {
+ _graphs.push_back(std::make_unique<RuntimeGraph>(this, memory_manager));
+ return _graphs.back().get();
+ }
+
+ const std::vector<Tensor *> &getInputTensors() const { return getMainGraph()->getInputTensors(); }
+ const std::vector<Tensor *> &getOutputTensors() const
+ {
+ return getMainGraph()->getOutputTensors();
+ }
+
+ void execute() const { getMainGraph()->execute(); }
+
+private:
+ RuntimeGraph *getMainGraph() const { return _graphs[0].get(); }
+
+ EventNotifier *const _event_notifier;
+ std::vector<std::unique_ptr<RuntimeGraph>> _graphs;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp
new file mode 100644
index 000000000..3c3c5ffff
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <cstring>
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantization,
+ std::string name)
+ : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)),
+ _name(std::move(name)), _data_allocated(false)
+{
+}
+
+void Tensor::readData(void *data_ptr, size_t data_size) const
+{
+ const size_t element_size = getDataTypeSize(element_type());
+ const int32_t num_elements = shape().num_elements();
+ if (data_size != num_elements * element_size)
+ {
+ throw std::invalid_argument("Invalid data size.");
+ }
+ assert(data_ptr != nullptr);
+ std::memcpy(data_ptr, data<void>(), data_size);
+}
+
+void Tensor::writeData(const void *data_ptr, size_t data_size)
+{
+ const size_t element_size = getDataTypeSize(element_type());
+ const int32_t num_elements = shape().num_elements();
+ if (data_size != num_elements * element_size)
+ {
+ throw std::invalid_argument("Invalid data size.");
+ }
+ assert(data_ptr != nullptr);
+ std::memcpy(data<void>(), data_ptr, data_size);
+}
+
+void Tensor::resize(const Shape &new_shape) { _shape = new_shape; }
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt
new file mode 100644
index 000000000..dd9733f92
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(SOURCES
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/GraphBuilderRegistry.h"
+ GraphBuilderRegistry.cpp)
+
+# include specific builders
+file(GLOB_RECURSE NODES "Nodes/*")
+list(APPEND SOURCES ${NODES})
+
+add_library(${LUCI_INTERPRETER_IMPORT} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_IMPORT} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+
+target_include_directories(${LUCI_INTERPRETER_IMPORT} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_IMPORT} PUBLIC luci_import)
diff --git a/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp b/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp
new file mode 100644
index 000000000..a33bca6a4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci_interpreter/GraphBuilderRegistry.h"
+#include "Nodes/CircleReferencingConst.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying()
+{
+ auto builder = std::make_unique<luci::GraphBuilderRegistry>();
+ {
+ // redefine NodeBuilder of BUFFER type
+ builder->add(std::make_unique<CircleReferencingConstNodeBuilder>());
+ }
+
+ return builder;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp
new file mode 100644
index 000000000..14e90f240
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleReferencingConst.h"
+
+#include <vector>
+
+namespace
+{
+
+// helper struct which describes data loaded to custom_options of CircleReferencingConst node
+struct ConstDataReference
+{
+ const uint8_t *data = nullptr;
+ uint32_t size = 0;
+};
+
+} // namespace
+
+namespace luci_interpreter
+{
+using namespace luci;
+
+CircleNode *CircleReferencingConstNodeBuilder::build(TensorIndex tensor_index,
+ GraphBuilderContext *context) const
+{
+ assert(tensor_index >= 0);
+
+ const auto graph = context->graph();
+ const auto reader = context->reader();
+ const auto tensors = reader->tensors();
+ auto const const_tensor = tensors[tensor_index];
+ assert(const_tensor != nullptr);
+ if (const_tensor->is_variable())
+ {
+ // Create CircleVariable for variable
+ return nullptr;
+ }
+
+ auto const buffer = wrap(reader->buffers()[const_tensor->buffer()]->data());
+ auto const const_dims = wrap(const_tensor->shape()); // in NHWC
+ if (const_dims.empty() && buffer.empty())
+ {
+ // unknown shape tensor and scalar tensor
+ return nullptr;
+ }
+
+ // if tensor_index is used as output to some other operator, this is not a constant
+ auto tensoroutputs = context->tensoroutputs();
+ if (tensoroutputs->find(tensor_index))
+ {
+ // other operator output tensor
+ return nullptr;
+ }
+
+ uint32_t num_elements = 1;
+ for (uint32_t r = 0; r < const_dims.size(); ++r)
+ {
+ num_elements = num_elements * const_dims[r];
+ }
+
+ if (buffer.empty() && num_elements > 0)
+ {
+ // normal empty tensor
+ return nullptr;
+ }
+
+ // create CircleReferencingConst
+ auto custom_node = graph->nodes()->create<CircleCustom>(0, 1);
+ {
+ custom_node->custom_code("CircleReferencingConst");
+
+ copy_tensor_attributes(const_tensor, custom_node);
+ custom_node->shape_status(luci::ShapeStatus::VALID);
+
+ // custom options stores size of buffer and pointer's value to buffer's data
+ {
+ std::vector<uint8_t> custom_options(sizeof(ConstDataReference));
+ {
+ auto &const_data_ref = *reinterpret_cast<ConstDataReference *>(custom_options.data());
+ const_data_ref = {buffer.data(), buffer.size()};
+ }
+ custom_node->custom_options(custom_options);
+ }
+ }
+
+ // Output of CircleCustom node presented with CircleConstNode
+ auto out_node = graph->nodes()->create<CircleCustomOut>();
+ {
+ out_node->index(0);
+ out_node->input(custom_node);
+
+ copy_tensor_attributes(const_tensor, out_node);
+ out_node->shape_status(luci::ShapeStatus::VALID);
+ }
+
+ return out_node;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h
new file mode 100644
index 000000000..ed8f95124
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+#define __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+
+#include <luci/Import/NodeBuilder.h>
+
+#include <luci/IR/Nodes/CircleConst.h>
+
+namespace luci_interpreter
+{
+using namespace luci;
+
+/**
+ * @brief Builder creates CircleCustom node with pointer to constants data from Tensor with buffer.
+ */
+class CircleReferencingConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER>
+{
+public:
+ CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final;
+};
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp
new file mode 100644
index 000000000..d7bf3084f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Add.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/add.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams &params)
+ : KernelWithParams<AddParams>({input1, input2}, {output}, params)
+{
+}
+
+void Add::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+ if (input1()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+ input2()->zero_points().size() == 1);
+ LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
+ output()->zero_point() == 0);
+ }
+
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Add::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Add::evalFloat() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<float>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastAdd4DSlow(
+ params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+ getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<float>(input1()),
+ getTensorShape(input2()), getTensorData<float>(input2()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ }
+}
+
+template <typename T> void Add::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastAdd4DSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
+void Add::evalQuantized() const
+{
+ const auto input1_scale = static_cast<double>(input1()->scale());
+ const auto input2_scale = static_cast<double>(input2()->scale());
+ const auto output_scale = static_cast<double>(output()->scale());
+
+ const int left_shift = 20;
+ const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+ const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+ const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+ const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+ int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+ int input1_shift{}, input2_shift{}, output_shift{};
+ quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+ quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+ quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::ArithmeticParams params{};
+ params.left_shift = left_shift;
+ // The kernel expects inputs' zero points to be negated.
+ params.input1_offset = -input1()->zero_point(); // Note the '-'.
+ params.input1_multiplier = input1_multiplier;
+ params.input1_shift = input1_shift;
+ params.input2_offset = -input2()->zero_point(); // Note the '-'.
+ params.input2_multiplier = input2_multiplier;
+ params.input2_shift = input2_shift;
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = output_multiplier;
+ params.output_shift = output_shift;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastAdd4DSlow(
+ params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+ getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+ getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+}
+
+void Add::evalQuantizedS16() const
+{
+ const auto input1_scale = static_cast<double>(input1()->scale());
+ const auto input2_scale = static_cast<double>(input2()->scale());
+ const auto output_scale = static_cast<double>(output()->scale());
+
+ constexpr int left_shift = 12;
+ const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+ const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+ const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+ const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+ int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+ int input1_shift{}, input2_shift{}, output_shift{};
+ quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+ quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+ quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ auto fn = [input1_multiplier, input1_shift, //
+ input2_multiplier, input2_shift, //
+ output_multiplier, output_shift, //
+ activation_min, activation_max](int16_t input1_val, int16_t input2_val) {
+ const int32_t shifted_input1_val = static_cast<int32_t>(input1_val) << left_shift;
+ const int32_t shifted_input2_val = static_cast<int32_t>(input2_val) << left_shift;
+ const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ shifted_input1_val, input1_multiplier, input1_shift);
+ const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ shifted_input2_val, input2_multiplier, input2_shift);
+ const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+ const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+ raw_sum, output_multiplier, output_shift);
+ const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output));
+ return static_cast<int16_t>(clamped_output);
+ };
+
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
+ getTensorShape(input2()), getTensorData<int16_t>(input2()),
+ getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.h b/compiler/luci-micro/luci-interpreter/src/kernels/Add.h
new file mode 100644
index 000000000..91d95b6af
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ADD_H
+#define LUCI_INTERPRETER_KERNELS_ADD_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Add : public KernelWithParams<AddParams>
+{
+public:
+ Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams &params);
+
+ const Tensor *input1() const { return _inputs[0]; }
+ const Tensor *input2() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ template <typename T> void evalInteger() const;
+ void evalQuantized() const;
+ void evalQuantizedS16() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ADD_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp
new file mode 100644
index 000000000..b8b1c3089
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Add.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class AddTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// for quantized Add, the error shouldn't exceed step
+float GetTolerance(float min, float max)
+{
+ float kQuantizedStep = (max - min) / 255.0;
+ return kQuantizedStep;
+}
+
+TEST_F(AddTest, Uint8)
+{
+ std::initializer_list<int32_t> base_shape = {2, 3, 1, 2};
+ std::initializer_list<float> base_data = {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
+ 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+ std::initializer_list<int32_t> test_shapes[] = {
+ {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+ std::initializer_list<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+ std::initializer_list<int32_t> output_shapes[] = {
+ {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+ std::vector<std::vector<float>> output_data = {
+ {-0.1f, 2.6f, -0.7f, 2.8f, 0.7f, 3.0f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+ 1.0f, -0.8f, 0.4f, -0.6f, 1.8f, -0.2f, 1.4f, 3.0f, 0.8f, 3.0f, 2.2f, 3.0f,
+ -1.4f, 0.3f, -2.0f, 0.5f, -0.6f, 0.9f, 0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f},
+ {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f, -1.3f},
+ {-0.1f, 2.5f, 0.0f, 2.6f, -0.7f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+ 1.0f, -0.9f, 1.1f, -0.8f, 0.4f, -1.5f, 1.7f, 3.0f, 2.2f, 3.0f, 2.1f, 3.0f,
+ -1.1f, 0.5f, -0.6f, 1.0f, -0.7f, 0.9f, 1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f},
+ {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f, -1.3f}};
+ float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
+ for (int i = 0; i < output_data.size(); i++)
+ {
+ Tensor input1_tensor = makeInputTensor<DataType::U8>(
+ base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U8>(
+ test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+ AddParams params{};
+ params.activation = Activation::NONE;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data[i], kQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+ }
+ // Re-run with exchanged inputs.
+ for (int i = 0; i < output_data.size(); i++)
+ {
+ Tensor input1_tensor = makeInputTensor<DataType::U8>(
+ test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U8>(
+ base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+ AddParams params{};
+ params.activation = Activation::NONE;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data[i], kQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+ }
+}
+
+TEST_F(AddTest, Float)
+{
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+ std::vector<std::vector<float>> test_outputs = {
+ {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+ 1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
+ 0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
+ {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
+ {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+ 1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
+ 0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
+ {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
+ std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
+ 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+ std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+ << "With shape number " << i;
+ }
+ // Re-run with exchanged inputs.
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+ << "With shape number " << i;
+ }
+}
+
+template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+ std::vector<std::vector<dtype>> test_outputs = {
+ {3, 3, 0, 1, 0, 8, 5, 1, 0, 0, 2, 6, 8, 0, 1, 0, 5, 1,
+ 5, 4, 0, 2, 2, 9, 11, 0, 4, 0, 8, 5, 11, 2, 4, 0, 8, 7},
+ {3, 3, 0, 0, 5, 1, 5, 4, 4, 0, 8, 7},
+ {3, 6, 0, 3, 0, 0, 5, 4, 2, 1, 0, 0, 8, 0, 5, 0, 1, 0,
+ 0, 2, 2, 4, 7, 9, 6, 0, 8, 0, 13, 5, 6, 0, 8, 2, 13, 7},
+ {3, 6, 2, 1, 1, 0, 0, 2, 8, 0, 13, 7}};
+ std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1};
+ std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+ // Re-run with exchanged inputs.
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+};
+
+TEST_F(AddTest, SInt32)
+{
+ CheckInteger<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(AddTest, SInt64)
+{
+ CheckInteger<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(AddTest, SInt16)
+{
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+ std::vector<std::vector<int32_t>> ref_output_shapes{
+ {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+
+ std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
+ 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+ std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+ std::vector<std::vector<float>> ref_outputs = {
+ {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+ 1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
+ 0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
+ {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
+ {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+ 1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
+ 0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
+ {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
+
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data,
+ _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0,
+ input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
+ const float tolerance = output_tensor.scale();
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor),
+ ::testing::ElementsAreArray(ref_output_shapes[i]))
+ << "With shape number " << i;
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+ << "With shape number " << i;
+ }
+ // Re-run with exchanged inputs and different scales.
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0,
+ input2_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data,
+ _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 5.0 / 32767, 0);
+ const float tolerance = output_tensor.scale();
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor),
+ ::testing::ElementsAreArray(ref_output_shapes[i]))
+ << "With shape number " << i;
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+ << "With shape number " << i;
+ }
+}
+
+TEST_F(AddTest, Input_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AddTest, Invalid_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AddTest, Invalid_Input_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(AddTest, Invalid_Quantization_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+ AddParams params{};
+ params.activation = Activation::NONE;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp
new file mode 100644
index 000000000..6561a1783
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ArgMax.h"
+#include "kernels/Utils.h"
+#include "PALArgMax.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ArgMax::ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams &params)
+ : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params)
+{
+}
+
+void ArgMax::configure()
+{
+ assert(axis()->element_type() == DataType::S32 || axis()->element_type() == DataType::S64);
+ assert(input()->shape().num_dims() >= 1);
+ const Shape &input_shape = input()->shape();
+ const int num_dims = input_shape.num_dims();
+ Shape output_shape(num_dims - 1);
+
+ // If axis value is negative, then update by adding input_shape's num_dims.
+ // If updated value also negative, then assert.
+ assert(axis()->shape().num_elements() == 1);
+ int axis_value = getTensorData<int32_t>(axis())[0];
+ if (axis_value < 0)
+ axis_value = axis_value + num_dims;
+ assert(axis_value >= 0);
+
+ int j = 0;
+ for (int i = 0; i < num_dims; i++)
+ {
+ if (i == axis_value)
+ continue;
+ output_shape.dim(j++) = input_shape.dim(i);
+ }
+
+ assert(output()->element_type() == _params.output_type);
+
+ output()->resize(output_shape);
+}
+
+void ArgMax::execute() const
+{
+
+#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \
+ luci_interpreter_pal::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \
+ getTensorData<axis_type>(axis()), getTensorShape(output()), \
+ getTensorData<output_type>(output()), std::greater<data_type>())
+ if (axis()->element_type() == DataType::S32)
+ {
+ switch (_params.output_type)
+ {
+ case DataType::S32:
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ TF_LITE_ARG_MAX(float, int32_t, int32_t);
+ break;
+ case DataType::U8:
+ TF_LITE_ARG_MAX(uint8_t, int32_t, int32_t);
+ break;
+ default:
+ throw std::runtime_error("Unsupported input type.");
+ }
+ break;
+ case DataType::S64:
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ TF_LITE_ARG_MAX(float, int32_t, int64_t);
+ break;
+ case DataType::U8:
+ TF_LITE_ARG_MAX(uint8_t, int32_t, int64_t);
+ break;
+ default:
+ throw std::runtime_error("Unsupported input type.");
+ }
+ break;
+ default:
+ throw std::runtime_error("Unsupported output type.");
+ }
+ }
+ else
+ {
+ switch (_params.output_type)
+ {
+ case DataType::S32:
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ TF_LITE_ARG_MAX(float, int64_t, int32_t);
+ break;
+ case DataType::U8:
+ TF_LITE_ARG_MAX(uint8_t, int64_t, int32_t);
+ break;
+ default:
+ throw std::runtime_error("Unsupported input type.");
+ }
+ break;
+ case DataType::S64:
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ TF_LITE_ARG_MAX(float, int64_t, int64_t);
+ break;
+ case DataType::U8:
+ TF_LITE_ARG_MAX(uint8_t, int64_t, int64_t);
+ break;
+ default:
+ throw std::runtime_error("Unsupported input type.");
+ }
+ break;
+ default:
+ throw std::runtime_error("Unsupported output type.");
+ }
+ }
+#undef TF_LITE_ARG_MAX
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h
new file mode 100644
index 000000000..c851b5891
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ARGMAX_H
+#define LUCI_INTERPRETER_KERNELS_ARGMAX_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ArgMax : public KernelWithParams<ArgMaxParams>
+{
+public:
+ ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *axis() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ARGMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp
new file mode 100644
index 000000000..474f4b321
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ArgMax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> dimension_shape,
+ std::initializer_list<int32_t> output_shape, std::initializer_list<T1> input_data,
+ std::initializer_list<int32_t> dimension_data, std::initializer_list<T2> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType element_type = getElementType<T1>();
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor dimension_tensor =
+ makeInputTensor<DataType::S32>(dimension_shape, dimension_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(getElementType<T2>());
+
+ ArgMaxParams params{};
+ params.output_type = getElementType<T2>();
+ ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class ArgMaxTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ArgMaxTest, DataTypes);
+
+TYPED_TEST(ArgMaxTest, Simple)
+{
+ Check<TypeParam, int32_t>(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{},
+ /*output_shape=*/{1, 1, 1},
+ /*input_data=*/
+ {
+ 1, 9, 7, 3, //
+ },
+ /*dimension_data=*/{3}, /*output_data=*/{1});
+ Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{},
+ /*output_shape=*/{1, 1, 1},
+ /*input_data=*/
+ {
+ 1, 9, 7, 3, //
+ },
+ /*dimension_data=*/{3}, /*output_data=*/{1});
+}
+
+TYPED_TEST(ArgMaxTest, MultiDimensions)
+{
+ Check<TypeParam, int32_t>(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{},
+ /*output_shape=*/{1, 1, 2},
+ /*input_data=*/
+ {
+ 1, 2, 7, 8, //
+ 1, 9, 7, 3, //
+ },
+ /*dimension_data=*/{3}, /*output_data=*/{3, 1});
+ Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{},
+ /*output_shape=*/{1, 1, 2},
+ /*input_data=*/
+ {
+ 1, 2, 7, 8, //
+ 1, 9, 7, 3, //
+ },
+ /*dimension_data=*/{3}, /*output_data=*/{3, 1});
+}
+
+TEST(ArgMaxTest, UnsupportedType_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4},
+ {
+ 1, 2, 7, 8, //
+ 1, 9, 7, 3, //
+ },
+ memory_manager.get());
+ Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ ArgMaxParams params{};
+ params.output_type = DataType::U8;
+ ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp
new file mode 100644
index 000000000..d3bade9e4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/AveragePool2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALAveragePool2d.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+ const Pool2DParams &params)
+ : KernelWithParams<Pool2DParams>({input}, {output, scratchpad}, params)
+{
+}
+
+void AveragePool2D::configure()
+{
+ if (input()->element_type() != output()->element_type())
+ {
+ throw std::runtime_error("Input Tensor and Output Tensor Type must be same");
+ }
+ if (input()->shape().num_dims() != 4)
+ {
+ throw std::runtime_error("Input Tensor Shape must be 4-D");
+ }
+ const Shape &input_shape = input()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t depth = input_shape.dim(3);
+
+ const int32_t output_height =
+ computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
+ const int32_t output_width =
+ computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+
+ _padding_height =
+ computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+ _padding_width =
+ computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+ if (input()->element_type() == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+ }
+ else if (input()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+ }
+ else if (input()->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+ }
+ output()->resize({batches, output_height, output_width, depth});
+
+ auto scratchpad = getOutputTensors()[1];
+ luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(),
+ getTensorShape(input()), getTensorShape(output()));
+}
+
+void AveragePool2D::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ case DataType::S16:
+ evalSInt16();
+ break;
+ case DataType::S8:
+ evalSInt8();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void AveragePool2D::evalFloat() const
+{
+ float activation_min{};
+ float activation_max{};
+ calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+ tflite::PoolParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.filter_height = _params.filter_height;
+ params.filter_width = _params.filter_width;
+ params.float_activation_min = activation_min;
+ params.float_activation_max = activation_max;
+
+ tflite::reference_ops::AveragePool(params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void AveragePool2D::evalQuantized() const
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::PoolParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.filter_height = _params.filter_height;
+ params.filter_width = _params.filter_width;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ tflite::reference_ops::AveragePool(params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+}
+
+void AveragePool2D::evalSInt8() const
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+ tflite::PoolParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.filter_height = _params.filter_height;
+ params.filter_width = _params.filter_width;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ auto scratchpad = getOutputTensors()[1];
+ int8_t *scratchpad_data = nullptr;
+ if (scratchpad->is_allocatable())
+ scratchpad_data = scratchpad->data<int8_t>();
+
+ luci_interpreter_pal::AveragePool<int8_t>(
+ params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()),
+ getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void AveragePool2D::evalSInt16() const
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::PoolParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.filter_height = _params.filter_height;
+ params.filter_width = _params.filter_width;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ tflite::reference_integer_ops::AveragePool(
+ params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+ getTensorShape(output()), getTensorData<int16_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h
new file mode 100644
index 000000000..2c8fe16e7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class AveragePool2D : public KernelWithParams<Pool2DParams>
+{
+public:
+ AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+ const Pool2DParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+ void evalSInt16() const;
+ void evalSInt8() const;
+
+private:
+ int32_t _padding_height{};
+ int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp
new file mode 100644
index 000000000..478bfa68e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/AveragePool2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class AveragePool2DTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(AveragePool2DTest, Float)
+{
+ Shape input_shape{1, 3, 5, 1};
+ std::vector<float> input_data{
+ -4, -3, -2, -1, 0, //
+ 1, 2, 3, 4, 5, //
+ 6, 7, 8, 9, 10, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 3;
+ params.stride_height = 1;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 0, 1.5, //
+ 4.5, 6, //
+ };
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
+}
+
+TEST_F(AveragePool2DTest, Uint8_0)
+{
+ std::vector<float> input_data{
+ 0, -6, 12, 4, //
+ -3, -2, 10, 7, //
+ };
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0.0, 6.0}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
+}
+
+TEST_F(AveragePool2DTest, Uint8_1)
+{
+ std::vector<float> input_data{
+ 0, 6, 12, 4, //
+ 3, 2, 10, 7, //
+ };
+
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
+ kernel.execute();
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
+}
+
+TEST_F(AveragePool2DTest, SInt16)
+{
+ Shape input_shape{1, 3, 5, 1};
+ std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+ std::vector<float> input_data{
+ -4, -3, -2, -1, 0, //
+ 1, 2, 3, 4, 5, //
+ 6, 7, 8, 9, 10, //
+ };
+ std::vector<float> ref_output_data{
+ 0, 1.5, //
+ 4.5, 6, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+ Tensor scratchpad(DataType::S16, Shape({}), {}, "");
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 3;
+ params.stride_height = 1;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(AveragePool2DTest, SInt8)
+{
+ Shape input_shape{1, 4, 5, 1};
+ std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+ std::vector<float> input_data{-7, -3, 0, 2, -5, 12, -15, 3, 10, 5,
+ 7, -6, -1, 9, -2, 0, -5, 11, -1, -7};
+ std::vector<float> ref_output_data{
+ 0, 2.5, //
+ 1, 1.5, //
+ };
+
+ std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-15.9375f, 15.9375f);
+ Tensor input_tensor = makeInputTensor<DataType::S8>(
+ input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+ Tensor scratchpad(DataType::S8, Shape({}), {}, "");
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 3;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG)
+{
+ Shape input_shape{1, 3, 5};
+ std::vector<float> input_data{
+ -4, -3, -2, -1, 0, //
+ 1, 2, 3, 4, 5, //
+ 6, 7, 8, 9, 10, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 3;
+ params.stride_height = 1;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AveragePool2DTest, In_Out_Type_NEG)
+{
+ Shape input_shape{1, 3, 5, 1};
+ std::vector<float> input_data{
+ -4, -3, -2, -1, 0, //
+ 1, 2, 3, 4, 5, //
+ 6, 7, 8, 9, 10, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 3;
+ params.stride_height = 1;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AveragePool2DTest, Quant_Param_NEG)
+{
+ std::vector<float> input_data{
+ 0, -6, 12, 4, //
+ -3, -2, 10, 7, //
+ };
+
+ std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+ std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param1.first, quant_param1.second, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp
new file mode 100644
index 000000000..24ca22996
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchMatMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape)
+{
+ tflite::RuntimeShape swapped_shape(shape);
+ const int32_t dims = shape.DimensionsCount();
+ swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1));
+ swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2));
+ return swapped_shape;
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp,
+ Tensor *y_tmp, const BatchMatMulParams &params)
+ : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params)
+{
+}
+
+void BatchMatMul::configure()
+{
+ auto lhs = x();
+ auto rhs = y();
+ auto adj_x = params().adj_x;
+ auto adj_y = params().adj_y;
+
+ // TODO Support non-float types
+ if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32)
+ throw std::runtime_error("Unsupported type.");
+
+ LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type());
+
+ auto lhs_rank = lhs->shape().num_dims();
+ auto rhs_rank = rhs->shape().num_dims();
+ LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4);
+ LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4);
+
+ auto lhs_scratchpad = temp_lhs();
+ auto rhs_scratchpad = temp_rhs();
+ luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs),
+ getTensorShape(rhs));
+
+ auto output_rank = std::max(lhs_rank, rhs_rank);
+
+ auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs));
+ auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs));
+
+ // Ensure any batch dimensions obey broacasting rules.
+ for (int i = 0; i < output_rank - 2; ++i)
+ {
+ const int lhs_dim = extended_lhs_shape.Dims(i);
+ const int rhs_dim = extended_rhs_shape.Dims(i);
+ if (lhs_dim != rhs_dim)
+ {
+ if (lhs_dim != 1)
+ {
+ LUCI_INTERPRETER_CHECK(rhs_dim == 1);
+ }
+ }
+ }
+
+ // Ensure other dimensions work for matrix multiplication.
+ int accum_dim_lhs =
+ adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1);
+ int accum_dim_rhs =
+ adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2);
+ LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs);
+
+ Shape output_shape(output_rank);
+ // Fill in any broadcast dimensions.
+ for (int i = 0; i < output_rank - 2; ++i)
+ {
+ const int lhs_dim = extended_lhs_shape.Dims(i);
+ const int rhs_dim = extended_rhs_shape.Dims(i);
+ int broadcast_dim = lhs_dim;
+ if ((lhs_dim != rhs_dim) && (lhs_dim == 1))
+ {
+ broadcast_dim = rhs_dim;
+ }
+ output_shape.dim(i) = broadcast_dim;
+ }
+ // Fill in the matmul dimensions.
+ int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2;
+ int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1;
+
+ output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index);
+ output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index);
+
+ output()->resize(output_shape);
+}
+
+void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out)
+{
+ tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in));
+ tflite::RuntimeShape shape(getTensorShape(tensor_in));
+ tflite::TransposeParams params;
+ int rank = shape.DimensionsCount();
+ params.perm_count = rank;
+ for (int i = 0; i < rank - 2; ++i)
+ {
+ params.perm[i] = i;
+ }
+ // Transpose the last two dimensions.
+ params.perm[rank - 2] = rank - 1;
+ params.perm[rank - 1] = rank - 2;
+ transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2));
+ transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1));
+ switch (tensor_in->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in),
+ transposed_shape, getTensorData<float>(tensor_out));
+ break;
+ default:
+ throw std::runtime_error("Only suppport fp32 BatchMatMul for now.");
+ }
+}
+
+void BatchMatMul::execute() const
+{
+ auto lhs = x();
+ auto rhs = y();
+
+ bool adj_x = params().adj_x;
+ bool adj_y = params().adj_y;
+
+ auto orig_lhs_shape = getTensorShape(lhs);
+ auto orig_rhs_shape = getTensorShape(rhs);
+
+ auto rhs_tensor = adj_y ? rhs : temp_rhs();
+ auto lhs_tensor = adj_x ? temp_lhs() : lhs;
+ if (not adj_y)
+ {
+ TransposeRowsColumns(rhs, temp_rhs());
+ }
+ if (adj_x)
+ {
+ TransposeRowsColumns(lhs, temp_lhs());
+ }
+ tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape);
+ tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape);
+
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape,
+ getTensorData<float>(lhs_tensor), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h
new file mode 100644
index 000000000..744f49795
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+#define LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchMatMul : public KernelWithParams<BatchMatMulParams>
+{
+public:
+ BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, Tensor *y_tmp,
+ const BatchMatMulParams &params);
+
+ const Tensor *x() const { return _inputs[0]; }
+ const Tensor *y() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ Tensor *temp_lhs() const { return _outputs[1]; }
+ Tensor *temp_rhs() const { return _outputs[2]; }
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp
new file mode 100644
index 000000000..edfa3a685
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class BatchMatMulTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(BatchMatMulTest, Float)
+{
+ std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+ std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleRHSAdjoint)
+{
+ std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+ std::vector<float> rhs_data = {7, 11, 15, 8, 12, 16, 9, 13, 17, 10, 14, 18};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 4, 3}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = true;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleLHSAdjoint)
+{
+ std::vector<float> lhs_data = {1, 4, 2, 5, 3, 6};
+ std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 2}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = true;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_BatchSizeTwo)
+{
+ std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+ std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 2, 3}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3, 4}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218., 560., 584., 608., 632.,
+ 767., 800., 833., 866.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_DiffBatch)
+{
+ std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+ std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 1, 6}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 6, 4}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({427., 448., 469., 490., 1039., 1096., 1153., 1210.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 4}));
+}
+
+TEST_F(BatchMatMulTest, Invalid_Shape_NEG)
+{
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 2, 3, 4}, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Batch_NEG)
+{
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 1, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+ Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({3, 3, 1}, {5, 6, 7, 8, 9, 10, 11, 12, 13},
+ _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank_NEG)
+{
+ Tensor lhs_tensor = makeInputTensor<DataType::FLOAT32>({4}, {1, 2, 3, 4}, _memory_manager.get());
+ Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+ _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank2_NEG)
+{
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 4}, {1, 2, 3, 4}, _memory_manager.get());
+ Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+ _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, TypeMisMatch_NEG)
+{
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::U8, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp
new file mode 100644
index 000000000..bd315ff7b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchToSpaceND.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+namespace
+{
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+} // namespace
+
+BatchToSpaceND::BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+ Tensor *output)
+ : Kernel({input, block_shape, crops}, {output})
+{
+}
+
+void BatchToSpaceND::configure()
+{
+
+ const auto *block_shape_data = block_shape()->data<int32_t>();
+ const auto *crops_data = crops()->data<int32_t>();
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+ int spatial_dims_num = input()->shape().num_dims() - 2;
+
+ LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+ LUCI_INTERPRETER_CHECK(crops()->shape().num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(crops()->shape().dim(0) == spatial_dims_num);
+ LUCI_INTERPRETER_CHECK(crops()->shape().dim(1) == 2);
+ for (int i = 0; i < spatial_dims_num * 2; ++i)
+ {
+ LUCI_INTERPRETER_CHECK(crops_data[i] >= 0);
+ }
+
+ Shape output_shape = Shape(input()->shape().num_dims());
+ int output_batch_size = input()->shape().dim(0);
+ for (int i = 0; i < spatial_dims_num; ++i)
+ {
+ LUCI_INTERPRETER_CHECK(output_batch_size % block_shape_data[i] == 0);
+ output_batch_size = output_batch_size / block_shape_data[i];
+ output_shape.dim(i + 1) =
+ input()->shape().dim(i + 1) * block_shape_data[i] - crops_data[i * 2] - crops_data[i * 2 + 1];
+ }
+
+ output_shape.dim(0) = output_batch_size;
+ output_shape.dim(input()->shape().num_dims() - 1) =
+ input()->shape().dim(input()->shape().num_dims() - 1);
+ output()->resize(output_shape);
+}
+
+void BatchToSpaceND::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::BatchToSpaceND(
+ getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()),
+ getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+ getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ luci_interpreter_pal::BatchToSpaceND(
+ getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()),
+ getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+ getTensorData<int32_t>(crops()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h
new file mode 100644
index 000000000..57703ea5d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchToSpaceND : public Kernel
+{
+public:
+ BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+ Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *block_shape() const { return _inputs[1]; }
+ const Tensor *crops() const { return _inputs[2]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
new file mode 100644
index 000000000..52647a763
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> block_shape_shape,
+ std::initializer_list<int32_t> crops_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<T> input_data, std::initializer_list<int32_t> block_shape_data,
+ std::initializer_list<int32_t> crops_data, std::initializer_list<T> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType element_type = getElementType<T>();
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor block_shape_tensor =
+ makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+ Tensor crops_tensor =
+ makeInputTensor<DataType::S32>(crops_shape, crops_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(element_type);
+
+ BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class BatchToSpaceNDTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(BatchToSpaceNDTest, DataTypes);
+
+TYPED_TEST(BatchToSpaceNDTest, Simple)
+{
+ Check<TypeParam>(/*input_shape=*/{4, 2, 2, 1}, /*block_shape_shape=*/{2}, /*crops_shape=*/{2, 2},
+ /*output_shape=*/{1, 4, 4, 1},
+ /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ /*block_shape_data=*/{2, 2}, /*crops_data=*/{0, 0, 0, 0},
+ /*output_data=*/{1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16});
+}
+
+TEST(BatchToSpaceNDTest, Invalid_Shape_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, memory_manager.get());
+ Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+ Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(BatchToSpaceNDTest, Invalid_Crops_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, memory_manager.get());
+ Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+ Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h b/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h
new file mode 100644
index 000000000..2d2842a9e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
+#define LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
+
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+// Derived from tensorflow/lite/kernels/internal/reference/maximum_minimum.h (v2.3.0).
+template <typename T, typename Op, int N = 5>
+void BinaryOpBroadcastSlow(const tflite::RuntimeShape &unextended_input1_shape,
+ const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape,
+ const T *input2_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data,
+ Op op)
+{
+ if (unextended_input1_shape == unextended_input2_shape)
+ {
+ const int flat_size = tflite::MatchingElementsSize(
+ unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
+ for (int i = 0; i < flat_size; ++i)
+ {
+ output_data[i] = op(input1_data[i], input2_data[i]);
+ }
+ }
+ else
+ {
+ assert(unextended_input1_shape.DimensionsCount() <= N);
+ assert(unextended_input2_shape.DimensionsCount() <= N);
+ assert(unextended_output_shape.DimensionsCount() <= N);
+
+ tflite::NdArrayDesc<N> desc1{};
+ tflite::NdArrayDesc<N> desc2{};
+ tflite::NdArrayDesc<N> output_desc{};
+ tflite::NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape,
+ &desc1, &desc2);
+ tflite::CopyDimsToDesc(tflite::RuntimeShape::ExtendedShape(N, unextended_output_shape),
+ &output_desc);
+
+ auto fn = [&](int indexes[N]) {
+ output_data[SubscriptToIndex(output_desc, indexes)] =
+ op(input1_data[SubscriptToIndex(desc1, indexes)],
+ input2_data[SubscriptToIndex(desc2, indexes)]);
+ };
+ tflite::NDOpsHelper<N>(output_desc, fn);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt
new file mode 100644
index 000000000..9f4ba0e0b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt
@@ -0,0 +1,43 @@
+set(SOURCES
+ BinaryOpCommon.h
+ Utils.h
+ Utils.cpp
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h"
+ ${LUCI_INTERPRETER_SOURCE_DIR}/TestMemoryManager.cpp
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h"
+ ${LUCI_INTERPRETER_SOURCE_DIR}/SimpleMemoryManager.cpp)
+
+macro(REGISTER_KERNEL NODE)
+ list(APPEND SOURCES "${NODE}.h")
+ list(APPEND SOURCES "${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
+
+add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
+
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PRIVATE nncc_common)
+
+add_pal_to_target(${LUCI_INTERPRETER_KERNELS})
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+macro(REGISTER_KERNEL NODE)
+ list(APPEND TEST_SOURCES "${NODE}.test.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
+
+list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp)
+
+GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_KERNELS})
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp
new file mode 100644
index 000000000..39ee725dc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Cast.h"
+#include "kernels/Utils.h"
+
+namespace
+{
+
+using namespace luci_interpreter;
+using namespace luci_interpreter::kernels;
+
+template <typename InT, typename OutT>
+void cast_data(const InT *in_data, OutT *out_data, uint32_t elements_count)
+{
+ std::transform(in_data, in_data + elements_count, out_data,
+ [](InT a) { return static_cast<OutT>(a); });
+}
+
+template <typename InT> void cast_from_pointer_to_tensor(const InT *in_data, Tensor *out_tensor)
+{
+ auto const out_type = out_tensor->element_type();
+ auto const elements_count = out_tensor->shape().num_elements();
+
+ switch (out_type)
+ {
+ case loco::DataType::U8:
+ cast_data(in_data, getTensorData<uint8_t>(out_tensor), elements_count);
+ break;
+ case loco::DataType::U16:
+ cast_data(in_data, getTensorData<uint16_t>(out_tensor), elements_count);
+ break;
+ case loco::DataType::U32:
+ cast_data(in_data, getTensorData<uint32_t>(out_tensor), elements_count);
+ break;
+ case loco::DataType::U64:
+ cast_data(in_data, getTensorData<uint64_t>(out_tensor), elements_count);
+ break;
+ case loco::DataType::S8:
+ cast_data(in_data, getTensorData<int8_t>(out_tensor), elements_count);
+ break;
+ case loco::DataType::S16:
+ cast_data(in_data, getTensorData<int16_t>(out_tensor), elements_count);
+ break;
+ case loco::DataType::S32:
+ cast_data(in_data, getTensorData<int32_t>(out_tensor), elements_count);
+ break;
+ case loco::DataType::S64:
+ cast_data(in_data, getTensorData<int64_t>(out_tensor), elements_count);
+ break;
+ case loco::DataType::FLOAT32:
+ cast_data(in_data, getTensorData<float>(out_tensor), elements_count);
+ break;
+ case loco::DataType::BOOL:
+ cast_data(in_data, getTensorData<bool>(out_tensor), elements_count);
+ break;
+ default:
+ throw std::runtime_error("Unsupported output type.");
+ }
+}
+
+void cast_from_tensor_to_tensor(const Tensor *in_tensor, Tensor *out_tensor)
+{
+ auto in_type = in_tensor->element_type();
+
+ switch (in_type)
+ {
+ case loco::DataType::U8:
+ cast_from_pointer_to_tensor(getTensorData<uint8_t>(in_tensor), out_tensor);
+ break;
+ case loco::DataType::U16:
+ cast_from_pointer_to_tensor(getTensorData<uint16_t>(in_tensor), out_tensor);
+ break;
+ case loco::DataType::U32:
+ cast_from_pointer_to_tensor(getTensorData<uint32_t>(in_tensor), out_tensor);
+ break;
+ case loco::DataType::U64:
+ cast_from_pointer_to_tensor(getTensorData<uint64_t>(in_tensor), out_tensor);
+ break;
+ case loco::DataType::S8:
+ cast_from_pointer_to_tensor(getTensorData<int8_t>(in_tensor), out_tensor);
+ break;
+ case loco::DataType::S16:
+ cast_from_pointer_to_tensor(getTensorData<int16_t>(in_tensor), out_tensor);
+ break;
+ case loco::DataType::S32:
+ cast_from_pointer_to_tensor(getTensorData<int32_t>(in_tensor), out_tensor);
+ break;
+ case loco::DataType::S64:
+ cast_from_pointer_to_tensor(getTensorData<int64_t>(in_tensor), out_tensor);
+ break;
+ case loco::DataType::FLOAT32:
+ cast_from_pointer_to_tensor(getTensorData<float>(in_tensor), out_tensor);
+ break;
+ case loco::DataType::BOOL:
+ cast_from_pointer_to_tensor(getTensorData<bool>(in_tensor), out_tensor);
+ break;
+ default:
+ throw std::runtime_error("Unsupported input type.");
+ }
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Cast::Cast(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Cast::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() != loco::DataType::Unknown);
+ LUCI_INTERPRETER_CHECK(output()->element_type() != loco::DataType::Unknown);
+
+ const Shape &shape = input()->shape();
+ output()->resize(shape);
+}
+
+void Cast::execute() const
+{
+ assert(input()->shape().num_elements() == output()->shape().num_elements());
+
+ cast_from_tensor_to_tensor(input(), output());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h
new file mode 100644
index 000000000..f0bd02037
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_CAST_H
+#define LUCI_INTERPRETER_KERNELS_CAST_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Cast : public Kernel
+{
+public:
+ Cast(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_CAST_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp
new file mode 100644
index 000000000..4713ad34c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Cast.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> shape, std::initializer_list<T1> input_data,
+ std::initializer_list<T2> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType input_type = getElementType<T1>();
+ constexpr DataType output_type = getElementType<T2>();
+
+ Tensor input_tensor = makeInputTensor<input_type>(shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(output_type);
+
+ Cast kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), shape);
+}
+
+template <typename T>
+void CheckBoolTo(std::initializer_list<int32_t> shape, std::initializer_list<bool> input_data,
+ std::initializer_list<T> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType input_type = loco::DataType::BOOL;
+ constexpr DataType output_type = getElementType<T>();
+ std::vector<typename DataTypeImpl<input_type>::Type> input_data_converted;
+ for (auto elem : input_data)
+ {
+ input_data_converted.push_back(elem);
+ }
+
+ Tensor input_tensor =
+ makeInputTensor<input_type>(shape, input_data_converted, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(output_type);
+
+ Cast kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), shape);
+}
+
+template <typename T> class CastTest : public ::testing::Test
+{
+};
+
+using IntDataTypes =
+ ::testing::Types<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>;
+TYPED_TEST_SUITE(CastTest, IntDataTypes);
+
+TYPED_TEST(CastTest, FloatToInt)
+{
+ Check<float, TypeParam>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ 1.0f, 9.0f, 7.0f, 3.0f, //
+ },
+ /*output_data=*/
+ {
+ 1, 9, 7, 3, //
+ });
+ SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToFloat)
+{
+ Check<TypeParam, float>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ 1, 9, 7, 3, //
+ },
+ /*output_data=*/
+ {
+ 1.0f, 9.0f, 7.0f, 3.0f, //
+ });
+ SUCCEED();
+}
+
+template <typename T1, typename T2> void check_int()
+{
+ Check<T1, T2>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ 1, 9, 7, 3, //
+ },
+ /*output_data=*/
+ {
+ 1, 9, 7, 3, //
+ });
+ SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToInt)
+{
+ check_int<TypeParam, uint8_t>();
+ check_int<TypeParam, uint16_t>();
+ check_int<TypeParam, uint32_t>();
+ check_int<TypeParam, uint64_t>();
+ check_int<TypeParam, int8_t>();
+ check_int<TypeParam, int16_t>();
+ check_int<TypeParam, int32_t>();
+ check_int<TypeParam, int64_t>();
+ SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToBool)
+{
+ Check<TypeParam, bool>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ 1, 0, 7, 0, //
+ },
+ /*output_data=*/
+ {
+ true, false, true, false, //
+ });
+ SUCCEED();
+}
+
+TYPED_TEST(CastTest, BoolToInt)
+{
+ CheckBoolTo<TypeParam>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ true, false, false, true, //
+ },
+ /*output_data=*/
+ {
+ 1, 0, 0, 1, //
+ });
+ SUCCEED();
+}
+
+TEST(CastTest, FloatToBool)
+{
+ Check<float, bool>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ 1.0f, 0.0f, 7.0f, 0.0f, //
+ },
+ /*output_data=*/
+ {
+ true, false, true, false, //
+ });
+ SUCCEED();
+}
+
+TEST(CastTest, BoolToFloat)
+{
+ CheckBoolTo<float>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ true, false, false, true, //
+ },
+ /*output_data=*/
+ {
+ 1.0f, 0.0f, 0.0f, 1.0f, //
+ });
+ SUCCEED();
+}
+
+TEST(CastTest, FloatToFloat)
+{
+ Check<float, float>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ 1.0f, 0.0f, 7.0f, 0.0f, //
+ },
+ /*output_data=*/
+ {
+ 1.0f, 0.0f, 7.0f, 0.0f, //
+ });
+ SUCCEED();
+}
+
+TEST(CastTest, BoolToBool)
+{
+ CheckBoolTo<bool>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ true, true, false, false, //
+ },
+ /*output_data=*/
+ {
+ true, true, false, false, //
+ });
+ SUCCEED();
+}
+
+TEST(CastTest, UnsupportedType_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4},
+ {
+ 1, 2, 7, 8, //
+ 1, 9, 7, 3, //
+ },
+ memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::Unknown);
+
+ Cast kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+ SUCCEED();
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp
new file mode 100644
index 000000000..46ee5941e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Concatenation.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/concatenation.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Concatenation::Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
+ const ConcatenationParams &params)
+ : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params)
+{
+}
+
+void Concatenation::configure()
+{
+ const int num_inputs = _inputs.size();
+ LUCI_INTERPRETER_CHECK(num_inputs > 0);
+ const Tensor *t0 = _inputs[0];
+
+ // TODO: Support concat with fused activation function
+ LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::NONE);
+
+ int axis = _params.axis;
+ if (axis < 0)
+ axis += t0->shape().num_dims();
+ LUCI_INTERPRETER_CHECK(axis >= 0 && axis < t0->shape().num_dims());
+
+ int32_t sum_axis = t0->shape().dim(axis);
+ for (int i = 1; i < num_inputs; ++i)
+ {
+ const Tensor *tensor = _inputs[i];
+ LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
+ LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
+ for (int d = 0; d < t0->shape().num_dims(); ++d)
+ {
+ if (d == axis)
+ {
+ sum_axis += tensor->shape().dim(axis);
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
+ }
+ }
+ }
+
+ Shape output_shape = t0->shape();
+ output_shape.dim(axis) = sum_axis;
+
+ // If input tensors are INT8 type then quantization parameters of all input tensors and the output
+ // should be the same
+ for (auto current_tensor : _inputs)
+ {
+ if (current_tensor->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(current_tensor->quantized_dimension() ==
+ output()->quantized_dimension());
+
+ LUCI_INTERPRETER_CHECK(current_tensor->zero_points().size() ==
+ current_tensor->scales().size());
+ LUCI_INTERPRETER_CHECK(current_tensor->zero_points() == output()->zero_points());
+ LUCI_INTERPRETER_CHECK(current_tensor->scales() == output()->scales());
+ }
+ }
+ output()->resize(output_shape);
+}
+
+void Concatenation::execute() const
+{
+ switch (_inputs[0]->element_type())
+ {
+ case DataType::FLOAT32:
+ evalGeneric<float>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ case DataType::S8:
+ evalGeneric<int8_t>();
+ break;
+ case DataType::S32:
+ evalGeneric<int32_t>();
+ break;
+ case DataType::S64:
+ evalGeneric<int64_t>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+template <typename T> void Concatenation::evalGeneric() const
+{
+ int axis = _params.axis;
+ if (axis < 0)
+ axis += output()->shape().num_dims();
+
+ VectorOfTensors<T, true> inputs(_inputs);
+ tflite::ConcatenationParams params{};
+ params.axis = axis;
+ params.inputs_count = _inputs.size();
+ tflite::reference_ops::Concatenation(params, inputs.shapes(), inputs.data(),
+ getTensorShape(output()), getTensorData<T>(output()));
+}
+
+void Concatenation::evalQuantized() const
+{
+ int axis = _params.axis;
+ if (axis < 0)
+ axis += output()->shape().num_dims();
+
+ VectorOfQuantizedTensors<true> inputs(_inputs);
+ tflite::ConcatenationParams params{};
+ params.axis = axis;
+ params.input_zeropoint = inputs.zero_point();
+ params.input_scale = inputs.scale();
+ params.inputs_count = _inputs.size();
+ params.output_zeropoint = output()->zero_point();
+ params.output_scale = output()->scale();
+
+ tflite::reference_ops::ConcatenationWithScaling(params, inputs.shapes(), inputs.data(),
+ getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h
new file mode 100644
index 000000000..b48c8ed1e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_CONCATENATION_H
+#define LUCI_INTERPRETER_KERNELS_CONCATENATION_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Concatenation : public KernelWithParams<ConcatenationParams>
+{
+public:
+ Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
+ const ConcatenationParams &params);
+
+ const Tensor *input(int index) const { return _inputs[index]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> void evalGeneric() const;
+ void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_CONCATENATION_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp
new file mode 100644
index 000000000..f893b38fd
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Concatenation.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ConcatenationTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ConcatenationTest, Float)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ // Try different 'axis' and expect different results.
+ {
+ params.axis = 0;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ kernel.configure();
+ for (auto t : kernel.getOutputTensors())
+ {
+ _memory_manager->allocate_memory(*t);
+ }
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
+ }
+ {
+ params.axis = -2; // Same as '0'.
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
+ }
+ {
+ params.axis = 1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
+ }
+ {
+ params.axis = -1; // Same as '1'.
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
+ }
+}
+
+TEST_F(ConcatenationTest, Input_Number_Check_NEG)
+{
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Invalid_Axis_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ params.axis = -3;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Type_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<uint8_t> input2_data{7, 8, 9, 10, 11, 12};
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15};
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG)
+{
+ std::vector<uint8_t> input1_data{1, 2, 3, 4};
+ std::vector<int8_t> input2_data{5, 6, 7, 8};
+ Tensor input1_tensor = makeInputTensor<DataType::U8>({2, 2}, input1_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 2}, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8);
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+ int quantized_dimension = 3;
+ std::vector<float> scales{0.1, 0.2, 0.3};
+ std::vector<int32_t> zero_points{1, -1, 1};
+
+ Tensor input1_tensor = makeInputTensor<DataType::S8>(
+ {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S8>(
+ {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0));
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4};
+ std::vector<float> input2_data{5, 6, 7, 8};
+ float scale = 0.1;
+ int32_t zero_point_1 = 1;
+ int32_t zero_point_2 = -1;
+
+ Tensor input1_tensor =
+ makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1);
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+// TODO: Remove this test when concat w/ fused_activation is supported
+TEST_F(ConcatenationTest, With_Fused_Activation_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ ConcatenationParams params{};
+
+ params.axis = 1;
+ params.activation = luci::FusedActFunc::RELU;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp
new file mode 100644
index 000000000..234f95425
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp
@@ -0,0 +1,456 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Conv2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALConv2d.h"
+
+#include <stdexcept>
+#include <thread>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
+ Tensor *scratchpad, const Conv2DParams &params)
+ : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, scratchpad}, params)
+{
+}
+
+void Conv2D::configure()
+{
+ // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
+ // | input filter bias output |
+ // ----+---------------------------+
+ // (1) | float float float float |
+ // (2) | float int8 float float | hybrid
+ // (3) | uint8 uint8 int32 uint8 | quantized
+ // (4) | int8 int8 int32 int8 | quantized per channel
+ //
+ // We only support (1), (3) and (4) for now, and additionally the following:
+ // | input filter bias output |
+ // ----+---------------------------+
+ // (5) | int16 int16 int64 int16 |
+ //
+ if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
+ {
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+ }
+ else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+ }
+ else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+ static_cast<size_t>(filter()->shape().dim(0)));
+ for (auto zerop : filter()->zero_points())
+ {
+ LUCI_INTERPRETER_CHECK(zerop == 0);
+ }
+ }
+ else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported type.");
+ }
+ LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t output_depth = filter_shape.dim(0);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ LUCI_INTERPRETER_CHECK(filter_shape.dim(3) == input_shape.dim(3));
+
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+ bias()->shape().dim(0) == output_depth));
+
+ const int32_t output_height =
+ computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+ _params.dilation_height_factor);
+ const int32_t output_width =
+ computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+ _params.dilation_width_factor);
+
+ _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
+ input_height, filter_height, output_height);
+ _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
+ filter_width, output_width);
+
+ output()->resize({batches, output_height, output_width, output_depth});
+
+ // Allocate tensor for scratchpad, if needed.
+ tflite::ConvParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+ auto scratchpad = getOutputTensors()[1];
+ luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params,
+ getTensorShape(input()), getTensorShape(filter()),
+ getTensorShape(output()));
+
+ switch (_params.activation)
+ {
+ case Activation::NONE:
+ case Activation::RELU:
+ case Activation::RELU6:
+ case Activation::RELU_N1_TO_1:
+ break;
+ default:
+ throw std::runtime_error("Unsupported fused activation");
+ }
+}
+
+void Conv2D::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ if (filter()->element_type() == DataType::FLOAT32)
+ {
+ evalFloat();
+ break;
+ }
+ throw std::runtime_error("Unsupported type.");
+ case DataType::U8:
+ if (filter()->scales().size() == 1)
+ {
+ evalQuantized();
+ }
+ else if (filter()->scales().size() > 1)
+ {
+ LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+ static_cast<size_t>(filter()->shape().dim(0)));
+ evalQuantizedPerChannel();
+ }
+ break;
+ case DataType::S8:
+ evalQuantizedS8PerChannel();
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Conv2D::evalFloat() const
+{
+ float activation_min{};
+ float activation_max{};
+ calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+ tflite::ConvParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+ params.float_activation_min = activation_min;
+ params.float_activation_max = activation_max;
+
+ auto scratchpad = getOutputTensors()[1];
+ float *scratchpad_data = nullptr;
+ if (scratchpad->is_allocatable())
+ scratchpad_data = scratchpad->data<float>();
+
+ luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(filter()), getTensorData<float>(filter()),
+ getTensorShape(bias()), getTensorData<float>(bias()),
+ getTensorShape(output()), getTensorData<float>(output()),
+ getTensorShape(scratchpad), scratchpad_data);
+}
+
+void Conv2D::evalQuantized() const
+{
+ const auto input_scale = static_cast<double>(input()->scale());
+ const auto filter_scale = static_cast<double>(filter()->scale());
+ const auto output_scale = static_cast<double>(output()->scale());
+
+ const double real_multiplier = input_scale * filter_scale / output_scale;
+ int32_t output_multiplier{};
+ int output_shift{};
+ quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::ConvParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+ // The kernel expects input and filter zero points to be negated.
+ params.input_offset = -input()->zero_point(); // Note the '-'.
+ params.weights_offset = -filter()->zero_point(); // Note the '-'.
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = output_multiplier;
+ params.output_shift = output_shift;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ auto scratchpad = getOutputTensors()[1];
+ luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(filter()), getTensorData<uint8_t>(filter()),
+ getTensorShape(bias()), getTensorData<int32_t>(bias()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()),
+ getTensorShape(scratchpad), getTensorData<uint8_t>(scratchpad));
+}
+
+void Conv2D::evalQuantizedPerChannel() const
+{
+ const auto *input_data = getTensorData<uint8_t>(input());
+ const auto *filter_data = getTensorData<uint8_t>(filter());
+ const auto *bias_data = getTensorData<int32_t>(bias());
+ auto *output_data = getTensorData<uint8_t>(output());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t input_depth = input_shape.dim(3);
+ const int32_t output_depth = filter_shape.dim(0);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t output_height = output_shape.dim(1);
+ const int32_t output_width = output_shape.dim(2);
+
+ const int32_t stride_height = _params.stride_height;
+ const int32_t stride_width = _params.stride_width;
+ const int32_t dilation_height_factor = _params.dilation_height_factor;
+ const int32_t dilation_width_factor = _params.dilation_width_factor;
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ const std::vector<double> effective_output_scale =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ const std::vector<ChannelQuantMultipliers> multipliers_raw =
+ quantizeMultipliers(effective_output_scale);
+ BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(multipliers_raw);
+
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ const int32_t in_y_origin = out_y * stride_height - _padding_height;
+ const int32_t in_x_origin = out_x * stride_width - _padding_width;
+ int32_t acc = 0;
+ for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+ const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+ if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+ {
+ for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+ {
+ const uint8_t input_val =
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ const uint8_t filter_val =
+ filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+ acc += static_cast<int32_t>(input_val - input()->zero_point()) *
+ static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+ }
+ }
+ }
+ }
+ if (bias_data)
+ {
+ acc += bias_data[out_c];
+ }
+
+ int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+ acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
+
+ scaled_acc += output()->zero_point();
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+ output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+ }
+ }
+ }
+ }
+}
+
+void Conv2D::evalQuantizedS8PerChannel() const
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::ConvParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+ // The kernel expects filter zero points to be negated.
+ params.input_offset = -input()->zero_point(); // Note the '-'.
+ params.weights_offset = 0; // Unused in tflite code
+ params.output_offset = output()->zero_point();
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ const std::vector<double> effective_output_scales =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ std::vector<ChannelQuantMultipliers> quant_multipliers =
+ quantizeMultipliers(effective_output_scales);
+
+ std::vector<int32_t> shifts;
+ std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+ [](ChannelQuantMultipliers cm) { return cm.shift; });
+ std::vector<int32_t> multipliers;
+ std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+ std::back_inserter(multipliers),
+ [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+ auto scratchpad = getOutputTensors()[1];
+ int8_t *scratchpad_data = nullptr;
+ if (scratchpad->is_allocatable())
+ scratchpad_data = scratchpad->data<int8_t>();
+
+ luci_interpreter_pal::ConvPerChannel(
+ params, multipliers.data(), shifts.data(), getTensorShape(input()),
+ getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+ getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+ getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void Conv2D::evalQuantizedS16() const
+{
+ const auto *input_data = getTensorData<int16_t>(input());
+ const auto *filter_data = getTensorData<int16_t>(filter());
+ const auto *bias_data = getTensorData<int64_t>(bias());
+ auto *output_data = getTensorData<int16_t>(output());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t input_depth = input_shape.dim(3);
+ const int32_t output_depth = filter_shape.dim(0);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t output_height = output_shape.dim(1);
+ const int32_t output_width = output_shape.dim(2);
+
+ const int32_t stride_height = _params.stride_height;
+ const int32_t stride_width = _params.stride_width;
+ const int32_t dilation_height_factor = _params.dilation_height_factor;
+ const int32_t dilation_width_factor = _params.dilation_width_factor;
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ const std::vector<double> effective_output_scale =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ const std::vector<ChannelQuantMultipliers> multipliers_raw =
+ quantizeMultipliers(effective_output_scale);
+ BroadcastableWrapper<ChannelQuantMultipliers> multipliers(multipliers_raw);
+
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ const int32_t in_y_origin = out_y * stride_height - _padding_height;
+ const int32_t in_x_origin = out_x * stride_width - _padding_width;
+ int64_t acc = 0;
+ for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+ const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+ if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+ {
+ for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+ {
+ const int16_t input_val =
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ const int16_t filter_val =
+ filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+ acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+ }
+ }
+ }
+ }
+ if (bias_data)
+ {
+ acc += bias_data[out_c];
+ }
+
+ int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+ acc, multipliers[out_c].multiplier, multipliers[out_c].shift);
+
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+
+ output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+ }
+ }
+ }
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h
new file mode 100644
index 000000000..330bf3a2a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_CONV2D_H
+#define LUCI_INTERPRETER_KERNELS_CONV2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Conv2D : public KernelWithParams<Conv2DParams>
+{
+public:
+ Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
+ Tensor *scratchpad, const Conv2DParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *filter() const { return _inputs[1]; }
+ const Tensor *bias() const { return _inputs[2]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+ void evalQuantizedPerChannel() const;
+ void evalQuantizedS8PerChannel() const;
+ void evalQuantizedS16() const;
+
+private:
+ int32_t _padding_height{};
+ int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp
new file mode 100644
index 000000000..0fe6ef795
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Conv2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class Conv2DTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(Conv2DTest, Float)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(im2col);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 11, 16, 7, 20, // row = 0
+ 0, 40, 0, 44, // row = 1
+ };
+ std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, FloatPointwise)
+{
+ Shape input_shape{1, 2, 2, 2};
+ Shape filter_shape{2, 1, 1, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, // row = 0, col = 0
+ 3, 4, // row = 0, col = 1
+ 5, 6, // row = 1, col = 0
+ 7, 8, // row = 1, col = 1
+ };
+ std::vector<float> filter_data{
+ -1, 2, // out = 0
+ -3, 4, // out = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 1;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(im2col);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 4, 7, 6, 9, // row = 0
+ 8, 11, 10, 13, // row = 1
+ };
+ std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, FloatCheck)
+{
+ Shape input_shape{2, 2, 4, 1};
+ Shape filter_shape{3, 2, 2, 1};
+ Shape bias_shape{3};
+ std::vector<float> input_data{
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
+ };
+ std::vector<float> bias_data{1, 2, 3};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
+ };
+ std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, Uint8)
+{
+ std::vector<float> input_data{
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
+ };
+ std::vector<float> bias_data{1, 2, 3};
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first, input_quant_param.second,
+ filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(
+ {3}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::U8, Shape({}), {}, "");
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
+ };
+ std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, Uint8_CWQ)
+{
+ const int output_channels = 3;
+ std::vector<float> input_data{
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
+ };
+ std::vector<float> bias_data{1, 2, 3};
+ Shape filter_shape{output_channels, 2, 2, 1};
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 4);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+ std::vector<std::pair<float, int32_t>> filter_quant_params;
+ filter_quant_params.push_back(quantizationParams<uint8_t>(0, 4));
+ filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
+ filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
+
+ std::vector<float> filter_scales;
+ std::vector<int32_t> filter_zerops;
+ for (auto iter : filter_quant_params)
+ {
+ filter_scales.push_back(iter.first);
+ filter_zerops.push_back(iter.second);
+ }
+
+ std::vector<float> bias_scales;
+ for (int i = 0; i < output_channels; ++i)
+ bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+ std::vector<int32_t> zerop(output_channels, 0);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+ 0, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+ bias_data, _memory_manager.get());
+ Tensor im2col(DataType::U8, Shape({}), {}, "");
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
+ };
+ std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, SInt8_CWQ)
+{
+ const int output_channels = 3;
+ std::vector<float> input_data{
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
+ };
+ std::vector<float> bias_data{1, 2, 3};
+ Shape filter_shape{output_channels, 2, 2, 1};
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(0, 4);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+ std::vector<std::pair<float, int32_t>> filter_quant_params;
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+ std::vector<float> filter_scales;
+ std::vector<int32_t> filter_zerops;
+ for (auto iter : filter_quant_params)
+ {
+ filter_scales.push_back(iter.first);
+ filter_zerops.push_back(iter.second);
+ }
+
+ std::vector<float> bias_scales;
+ for (int i = 0; i < output_channels; ++i)
+ bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+ std::vector<int32_t> zerop(output_channels, 0);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+ 0, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+ bias_data, _memory_manager.get());
+ Tensor im2col(DataType::S8, Shape({}), {}, "");
+ Tensor output_tensor =
+ makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
+ };
+ std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, SInt16)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ std::vector<float> ref_output_data{
+ 11, 16, 7, 20, // row = 0
+ 0, 40, 0, 44, // row = 1
+ };
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::S16, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(Conv2DTest, SInt16_CWQ_weights)
+{
+ Shape input_shape{1, 2, 2, 2}; // Batch x H x W x C
+ Shape filter_shape{3, 1, 1, 2}; // Out channels x H x W x In Channels
+ Shape bias_shape{3};
+ std::vector<int32_t> ref_output_shape{1, 2, 2, 3};
+
+ std::vector<float> input_data{
+ 1, 2, // row = 0, col 0
+ 3, 4, // row = 0, col 1
+ 5, 6, // row = 1, col 0
+ 7, 8, // row = 1, col 1
+ };
+ std::vector<float> filter_data{
+ 4, -3, // out = 0
+ 1, -3, // out = 1
+ 5, -3, // out = 2
+ };
+ std::vector<float> bias_data{1, 10, 5};
+ std::vector<float> ref_output_data{
+ 0, 5, 4, // row 0, col 0
+ 1, 1, 8, // row 0, col 1
+ 3, 0, 12, // row 1, col 0
+ 5, 0, 16, // row 1, col 1
+ };
+
+ float input_scale = 0.25f;
+ float output_scale = 0.05f;
+ std::vector<float> filter_scales = {0.25f, 0.2f, 0.1f};
+ std::vector<float> bias_scales;
+ for (int i = 0; i < filter_scales.size(); ++i)
+ bias_scales.push_back(filter_scales[i] * input_scale);
+ std::vector<int32_t> zerop = {0, 0, 0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+ filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+ _memory_manager.get());
+ Tensor im2col(DataType::S16, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 1;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(Conv2DTest, Unsupported_Type_Configure_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<int32_t> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Bias_Type_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<uint8_t> bias_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Bias_Data_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{3};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2, 3};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Input_Shape_NEG)
+{
+ Shape input_shape{1, 4, 6, 1};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_fused_act_tanh_NEG)
+{
+ Shape input_shape{1, 4, 3, 2};
+ Shape filter_shape{2, 2, 2, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, 3, 4, 5, 6, // row = 0
+ 7, 8, 9, 10, 11, 12, // row = 1
+ 13, 14, 15, 16, 17, 18, // row = 2
+ 19, 20, 21, 22, 23, 24, // row = 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, -3, -4, // out = 0, row = 0
+ -5, 6, -7, 8, // out = 1, row = 0
+ 4, -2, 3, -1, // out = 0, row = 1
+ -8, -6, 7, 5, // out = 1, row = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::TANH;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp
new file mode 100644
index 000000000..3a9acd1d4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpace.h"
+#include "Utils.h"
+#include "PALDepthToSpace.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams &params)
+ : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
+{
+}
+
+void DepthToSpace::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+ output()->element_type() == DataType::U8)
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type())
+ const int block_size = params().block_size;
+ const int32_t input_height = input()->shape().dim(1);
+ const int32_t input_width = input()->shape().dim(2);
+ const int32_t input_channels = input()->shape().dim(3);
+ int32_t output_height = input_height * block_size;
+ int32_t output_width = input_width * block_size;
+ int32_t output_channels = input_channels / block_size / block_size;
+
+ LUCI_INTERPRETER_CHECK(input_height == output_height / block_size);
+ LUCI_INTERPRETER_CHECK(input_width == output_width / block_size);
+ LUCI_INTERPRETER_CHECK(input_channels == output_channels * block_size * block_size);
+
+ Shape output_shape(4);
+ output_shape.dim(0) = input()->shape().dim(0);
+ output_shape.dim(1) = output_height;
+ output_shape.dim(2) = output_width;
+ output_shape.dim(3) = output_channels;
+
+ output()->resize(output_shape);
+}
+
+void DepthToSpace::execute() const
+{
+ tflite::DepthToSpaceParams op_params;
+ op_params.block_size = params().block_size;
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported Type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h
new file mode 100644
index 000000000..63ce37610
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class DepthToSpace : public KernelWithParams<DepthToSpaceParams>
+{
+public:
+ DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp
new file mode 100644
index 000000000..88e6e07f1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthToSpace.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class DepthToSpaceTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(DepthToSpaceTest, DataTypes);
+
+TYPED_TEST(DepthToSpaceTest, SimpleCase)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+ Shape input_shape{1, 1, 2, 4};
+ std::vector<TypeParam> output_data{1, 2, 5, 6, 3, 4, 7, 8};
+ std::vector<int32_t> output_shape{1, 2, 4, 1};
+
+ Tensor input_tensor =
+ makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+ DepthToSpaceParams params{};
+ params.block_size = 2;
+
+ DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+ ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(DepthToSpaceTest, InvalidInputShape_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+ Shape input_shape{1, 2, 4};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ DepthToSpaceParams params{};
+ params.block_size = 2;
+
+ DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthToSpaceTest, InOutTypeMismatch_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+ Shape input_shape{1, 1, 2, 4};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ DepthToSpaceParams params{};
+ params.block_size = 2;
+
+ DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthToSpaceTest, InvalidBlockSize_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+ Shape input_shape{1, 1, 2, 4};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ DepthToSpaceParams params{};
+ params.block_size = 3;
+
+ DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..c554c309d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
@@ -0,0 +1,451 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthwiseConv2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALDepthwiseConv2d.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
+ Tensor *output, Tensor *scratchpad,
+ const DepthwiseConv2DParams &params)
+ : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params)
+{
+}
+
+void DepthwiseConv2D::configure()
+{
+ // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
+ // | input filter bias output |
+ // ----+---------------------------+
+ // (1) | float float float float |
+ // (2) | float int8 float float | hybrid
+ // (3) | uint8 uint8 int32 uint8 | quantized
+ // (4) | int8 int8 int32 int8 | quantized per channel
+ // (5) | int16 int8 int64 int16 | quantized per channel 16x8
+ //
+ // We only support (1), (3) and (4) for now, and additionally the following:
+ // | input filter bias output |
+ // ----+---------------------------+
+ // (5) | int16 int16 int64 int16 |
+ //
+ if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
+ {
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+ }
+ else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+ }
+ else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(static_cast<uint32_t>(filter()->shape().dim(3)) ==
+ filter()->scales().size());
+ for (auto zerop : filter()->zero_points())
+ {
+ LUCI_INTERPRETER_CHECK(zerop == 0);
+ }
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+ }
+ else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported type.");
+ }
+ LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ // Filter format: [1, H, W, O].
+ LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t channels_out = filter_shape.dim(3);
+
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+ bias()->shape().dim(0) == channels_out));
+
+ const int32_t output_height =
+ computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+ _params.dilation_height_factor);
+ const int32_t output_width =
+ computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+ _params.dilation_width_factor);
+
+ _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
+ input_height, filter_height, output_height);
+ _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
+ filter_width, output_width);
+
+ output()->resize({batches, output_height, output_width, channels_out});
+
+ tflite::DepthwiseParams params{};
+
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+
+ auto scratchpad = getOutputTensors()[1];
+ luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(),
+ getTensorShape(input()), getTensorShape(filter()),
+ getTensorShape(output()));
+}
+
+void DepthwiseConv2D::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ if (filter()->element_type() == DataType::FLOAT32)
+ {
+ evalFloat();
+ break;
+ }
+ throw std::runtime_error("Unsupported type.");
+ case DataType::U8:
+ if (filter()->scales().size() == 1)
+ {
+ evalQuantized();
+ }
+ else if (filter()->scales().size() > 1)
+ {
+ LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+ static_cast<size_t>(filter()->shape().dim(3)));
+ evalQuantizedPerChannel();
+ }
+ break;
+ case DataType::S8:
+ evalQuantizedS8PerChannel();
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void DepthwiseConv2D::evalFloat() const
+{
+ float activation_min{};
+ float activation_max{};
+ calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+ tflite::DepthwiseParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+ params.depth_multiplier = _params.depth_multiplier;
+ params.float_activation_min = activation_min;
+ params.float_activation_max = activation_max;
+
+ tflite::reference_ops::DepthwiseConv(
+ params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+ getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void DepthwiseConv2D::evalQuantizedPerChannel() const
+{
+ const auto *input_data = getTensorData<uint8_t>(input());
+ const auto *filter_data = getTensorData<uint8_t>(filter());
+ const auto *bias_data = getTensorData<int32_t>(bias());
+ auto *output_data = getTensorData<uint8_t>(output());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t input_depth = input_shape.dim(3);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t output_height = output_shape.dim(1);
+ const int32_t output_width = output_shape.dim(2);
+
+ const int32_t stride_height = _params.stride_height;
+ const int32_t stride_width = _params.stride_width;
+ const int32_t dilation_height_factor = _params.dilation_height_factor;
+ const int32_t dilation_width_factor = _params.dilation_width_factor;
+ const int32_t depth_multiplier = _params.depth_multiplier;
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ const std::vector<double> effective_output_scales =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+ quantizeMultipliers(effective_output_scales);
+ BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+ for (int batch = 0; batch < batches; ++batch)
+ {
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+ {
+ for (int m = 0; m < depth_multiplier; ++m)
+ {
+ const int output_channel = m + in_channel * depth_multiplier;
+ const int in_x_origin = (out_x * stride_width) - _padding_width;
+ const int in_y_origin = (out_y * stride_height) - _padding_height;
+ int32 acc = 0;
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int in_x = in_x_origin + dilation_width_factor * filter_x;
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ // Zero padding by omitting the areas outside the image.
+ const bool is_point_inside_image =
+ (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+ if (is_point_inside_image)
+ {
+ int32 input_val =
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
+ int32 filter_val =
+ filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
+ acc += (filter_val - filter()->zero_points()[output_channel]) *
+ (input_val - input()->zero_point());
+ }
+ }
+ }
+ if (bias_data)
+ {
+ acc += bias_data[output_channel];
+ }
+ int32_t output_multiplier = quant_multipliers[output_channel].multiplier;
+ int output_shift = quant_multipliers[output_channel].shift;
+ int32_t scaled_acc =
+ tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ scaled_acc += output()->zero_point();
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+ output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] =
+ static_cast<uint8_t>(scaled_acc);
+ }
+ }
+ }
+ }
+ }
+}
+
+void DepthwiseConv2D::evalQuantized() const
+{
+ const auto input_scale = static_cast<double>(input()->scale());
+ const auto filter_scale = static_cast<double>(filter()->scale());
+ const auto output_scale = static_cast<double>(output()->scale());
+
+ const double real_multiplier = input_scale * filter_scale / output_scale;
+ int32_t output_multiplier{};
+ int output_shift{};
+ quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::DepthwiseParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+ params.depth_multiplier = _params.depth_multiplier;
+ // The kernel expects input and filter zero points to be negated.
+ params.input_offset = -input()->zero_point(); // Note the '-'.
+ params.weights_offset = -filter()->zero_point(); // Note the '-'.
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = output_multiplier;
+ params.output_shift = output_shift;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ tflite::reference_ops::DepthwiseConv(
+ params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
+ getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void DepthwiseConv2D::evalQuantizedS8PerChannel() const
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::DepthwiseParams params{};
+
+ params.padding_type = tflite::PaddingType::kSame;
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+ params.depth_multiplier = _params.depth_multiplier;
+ // The kernel expects input and filter zero points to be negated.
+ params.input_offset = -input()->zero_point(); // Note the '-'.
+ params.weights_offset = 0;
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = 1; // unused in tflite code
+ params.output_shift = 0; // unused in tflite code
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ const std::vector<double> effective_output_scales =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ std::vector<ChannelQuantMultipliers> quant_multipliers =
+ quantizeMultipliers(effective_output_scales);
+
+ std::vector<int32_t> shifts;
+ std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+ [](ChannelQuantMultipliers cm) { return cm.shift; });
+ std::vector<int32_t> multipliers;
+ std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+ std::back_inserter(multipliers),
+ [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+ auto scratchpad = getOutputTensors()[1];
+ int8_t *scratchpad_data = nullptr;
+ if (scratchpad->is_allocatable())
+ scratchpad_data = scratchpad->data<int8_t>();
+
+ luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>(
+ params, multipliers.data(), shifts.data(), getTensorShape(input()),
+ getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+ getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+ getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void DepthwiseConv2D::evalQuantizedS16() const
+{
+ const auto *input_data = getTensorData<int16_t>(input());
+ const auto *filter_data = getTensorData<int16_t>(filter());
+ const auto *bias_data = getTensorData<int64_t>(bias());
+ auto *output_data = getTensorData<int16_t>(output());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t input_depth = input_shape.dim(3);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t output_height = output_shape.dim(1);
+ const int32_t output_width = output_shape.dim(2);
+
+ const int32_t stride_height = _params.stride_height;
+ const int32_t stride_width = _params.stride_width;
+ const int32_t dilation_height_factor = _params.dilation_height_factor;
+ const int32_t dilation_width_factor = _params.dilation_width_factor;
+ const int32_t depth_multiplier = _params.depth_multiplier;
+
+ const std::vector<double> effective_output_scales =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+ quantizeMultipliers(effective_output_scales);
+
+ BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+ {
+ for (int32_t m = 0; m < depth_multiplier; ++m)
+ {
+ const int32_t out_c = m + in_c * depth_multiplier;
+ const int32_t in_y_origin = out_y * stride_height - _padding_height;
+ const int32_t in_x_origin = out_x * stride_width - _padding_width;
+ int64_t acc = 0;
+ for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+ const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+ if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+ {
+ const int16_t input_val =
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ const int16_t filter_val =
+ filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
+ acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+ }
+ }
+ }
+ if (bias_data != nullptr)
+ {
+ acc += bias_data[out_c];
+ }
+
+ int32_t output_multiplier = quant_multipliers[out_c].multiplier;
+ int output_shift = quant_multipliers[out_c].shift;
+ int32_t scaled_acc =
+ tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+
+ output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h
new file mode 100644
index 000000000..3d1faf6c1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class DepthwiseConv2D : public KernelWithParams<DepthwiseConv2DParams>
+{
+public:
+ DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
+ Tensor *scratchpad, const DepthwiseConv2DParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *filter() const { return _inputs[1]; }
+ const Tensor *bias() const { return _inputs[2]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+ void evalQuantizedPerChannel() const;
+ void evalQuantizedS8PerChannel() const;
+ void evalQuantizedS16() const;
+
+private:
+ int32_t _padding_height{};
+ int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
new file mode 100644
index 000000000..6b4673f3e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
@@ -0,0 +1,622 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthwiseConv2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DepthwiseConv2DTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DepthwiseConv2DTest, Float)
+{
+ Shape input_shape{1, 4, 2, 2};
+ Shape filter_shape{1, 2, 2, 4};
+ Shape bias_shape{4};
+ std::vector<float> input_data{
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
+ };
+ std::vector<float> bias_data{1, 2, 3, 4};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ DepthwiseConv2DParams params{};
+ params.padding = Padding::VALID;
+ params.depth_multiplier = 2;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 71, 0, 99, 0, //
+ 167, 0, 227, 28, //
+ };
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
+}
+
+TEST_F(DepthwiseConv2DTest, Uint8)
+{
+ std::vector<float> input_data{
+ 1, 2, 7, 8, // column 1
+ 3, 4, 9, 10, // column 2
+ 5, 6, 11, 12, // column 3
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
+ };
+ std::vector<float> bias_data{1, 2, 3, 4};
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 3, 2, 2}, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first, input_quant_param.second,
+ filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(
+ {4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+ DepthwiseConv2DParams params{};
+ params.padding = Padding::VALID;
+ params.depth_multiplier = 2;
+ params.stride_height = 1;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 71, -34, 99, -20, //
+ 91, -26, 127, -4, //
+ };
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
+}
+
+TEST_F(DepthwiseConv2DTest, SInt16)
+{
+ Shape input_shape{1, 4, 2, 2};
+ Shape filter_shape{1, 2, 2, 4};
+ Shape bias_shape{4};
+ std::vector<int32_t> ref_output_shape{1, 2, 1, 4};
+
+ std::vector<float> input_data{
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
+ };
+ std::vector<float> bias_data{1, 2, 3, 4};
+ std::vector<float> ref_output_data{
+ 71, 0, 99, 0, //
+ 167, 0, 227, 28, //
+ };
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+ Tensor scratchpad(DataType::S64, Shape({}), {}, "");
+
+ DepthwiseConv2DParams params{};
+ params.padding = Padding::VALID;
+ params.depth_multiplier = 2;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights)
+{
+ const int output_channels = 4;
+ Shape input_shape{1, 4, 2, 2};
+ Shape filter_shape{1, 2, 2, output_channels};
+ Shape bias_shape{4};
+ std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+ std::vector<float> input_data{
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
+ };
+ std::vector<float> bias_data{1, 2, 3, 4};
+ std::vector<float> ref_output_data{
+ 71, 0, 99, 0, //
+ 167, 0, 227, 28, //
+ };
+
+ float input_scale = 0.25;
+ std::vector<float> filter_scales{0.2f, 1.f, 0.5f, 0.1f};
+ std::vector<float> bias_scales;
+ for (int i = 0; i < output_channels; ++i)
+ bias_scales.push_back(filter_scales[i] * input_scale);
+ std::vector<int32_t> zerop(4, 0);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3,
+ filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+ _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+ Tensor scratchpad(DataType::S16, Shape({}), {}, "");
+
+ DepthwiseConv2DParams params{};
+ params.padding = Padding::VALID;
+ params.depth_multiplier = 2;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights)
+{
+ const int output_channels = 4;
+ Shape input_shape{1, 3, 2, 2};
+ Shape filter_shape{1, 2, 2, output_channels};
+ Shape bias_shape{4};
+ std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+ std::vector<float> input_data{
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
+ };
+ std::vector<float> bias_data{1, 2, 3, 4};
+ std::vector<float> ref_output_data{
+ 71, -34, 99, -20, //
+ 91, -26, 127, -4, //
+ };
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 16);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+ std::vector<std::pair<float, int32_t>> filter_quant_params;
+ filter_quant_params.push_back(quantizationParams<uint8_t>(-9, 13));
+ filter_quant_params.push_back(quantizationParams<uint8_t>(-14, 10));
+ filter_quant_params.push_back(quantizationParams<uint8_t>(-11, 15));
+ filter_quant_params.push_back(quantizationParams<uint8_t>(-16, 12));
+
+ std::vector<float> filter_scales;
+ std::vector<int32_t> filter_zerops;
+ for (auto iter : filter_quant_params)
+ {
+ filter_scales.push_back(iter.first);
+ filter_zerops.push_back(iter.second);
+ }
+
+ std::vector<float> bias_scales;
+ for (int i = 0; i < output_channels; ++i)
+ bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+ std::vector<int32_t> zerop(output_channels, 0);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+ 3, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data,
+ _memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+ DepthwiseConv2DParams params{};
+ params.padding = Padding::VALID;
+ params.depth_multiplier = 2;
+ params.stride_height = 1;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, output_quant_param.first));
+}
+
+TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights)
+{
+ const int output_channels = 4;
+ Shape input_shape{1, 3, 2, 2};
+ Shape filter_shape{1, 2, 2, output_channels};
+ Shape bias_shape{4};
+ std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+ std::vector<float> input_data{
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
+ };
+ std::vector<float> bias_data{1, 2, 3, 4};
+ std::vector<float> ref_output_data{
+ 71, -34, 99, -20, //
+ 91, -26, 127, -4, //
+ };
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-128, 127);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+ std::vector<std::pair<float, int32_t>> filter_quant_params;
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+ filter_quant_params.push_back(std::pair<float, int32_t>(1, 0));
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+ std::vector<float> filter_scales;
+ std::vector<int32_t> filter_zerops;
+ for (auto iter : filter_quant_params)
+ {
+ filter_scales.push_back(iter.first);
+ filter_zerops.push_back(iter.second);
+ }
+
+ std::vector<float> bias_scales;
+ for (int i = 0; i < output_channels; ++i)
+ bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+ std::vector<int32_t> zerop(output_channels, 0);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+ 3, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data,
+ _memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+ Tensor scratchpad(DataType::S8, Shape({}), {}, "");
+
+ DepthwiseConv2DParams params{};
+ params.padding = Padding::VALID;
+ params.depth_multiplier = 2;
+ params.stride_height = 1;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, output_quant_param.first));
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG)
+{
+ Shape input_shape{1, 4, 2, 2};
+ Shape filter_shape{1, 2, 2, 4};
+ Shape bias_shape{4};
+ std::vector<float> input_data{
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
+ };
+ std::vector<int32_t> bias_data{1, 2, 3, 4};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+ DepthwiseConv2DParams params{};
+ params.padding = Padding::VALID;
+ params.depth_multiplier = 2;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
+{
+ Shape input_shape{1, 4, 2, 2};
+ Shape filter_shape{1, 2, 2, 4};
+ Shape bias_shape{4};
+ std::vector<float> input_data{
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
+ };
+ std::vector<float> bias_data{1, 2, 3, 4};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+ DepthwiseConv2DParams params{};
+ params.padding = Padding::VALID;
+ params.depth_multiplier = 2;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG)
+{
+ Shape input_shape{4, 2, 2};
+ Shape filter_shape{2, 2, 4};
+ Shape bias_shape{4};
+ std::vector<float> input_data{
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
+ };
+ std::vector<float> bias_data{1, 2, 3, 4};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+ DepthwiseConv2DParams params{};
+ params.padding = Padding::VALID;
+ params.depth_multiplier = 2;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG)
+{
+ Shape input_shape{1, 4, 2, 2};
+ Shape filter_shape{2, 1, 2, 4};
+ Shape bias_shape{4};
+ std::vector<float> input_data{
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
+ };
+ std::vector<float> bias_data{1, 2, 3, 4};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+ DepthwiseConv2DParams params{};
+ params.padding = Padding::VALID;
+ params.depth_multiplier = 2;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG)
+{
+ Shape input_shape{1, 4, 2, 2};
+ Shape filter_shape{1, 2, 4, 2};
+ Shape bias_shape{4};
+ std::vector<float> input_data{
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ 13, 14, 15, 16, //
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
+ };
+ std::vector<float> bias_data{1, 2, 3, 4};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+ DepthwiseConv2DParams params{};
+ params.padding = Padding::VALID;
+ params.depth_multiplier = 2;
+ params.stride_height = 2;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp
new file mode 100644
index 000000000..96399e5c7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/Utils.h"
+#include "PALDequantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Dequantize::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::S8 ||
+ input()->element_type() == loco::DataType::U8 ||
+ input()->element_type() == loco::DataType::S16);
+
+ LUCI_INTERPRETER_CHECK(input()->scales().size() == 1);
+
+ if (input()->element_type() == loco::DataType::S16)
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
+
+ output()->resize(input()->shape());
+}
+
+void Dequantize::execute() const
+{
+ tflite::DequantizationParams op_params;
+ op_params.zero_point = input()->zero_point();
+ op_params.scale = input()->scale();
+
+ switch (input()->element_type())
+ {
+ case loco::DataType::U8:
+ {
+ luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ case loco::DataType::S8:
+ {
+ luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+ getTensorData<int8_t>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ case loco::DataType::S16:
+ {
+ luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+ getTensorData<int16_t>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h
new file mode 100644
index 000000000..5565df0e4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Dequantize : public Kernel
+{
+public:
+ Dequantize(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp
new file mode 100644
index 000000000..0cab633d6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DequantizeTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DequantizeTest, Uint8)
+{
+ std::vector<uint8_t> input_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+ std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+ Tensor input_tensor(loco::DataType::U8, {2, 5}, {{0.5}, {127}}, "");
+
+ _memory_manager->allocate_memory(input_tensor);
+ input_tensor.writeData(input_data.data(), input_data.size() * sizeof(uint8_t));
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint8)
+{
+ std::vector<int8_t> input_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+ std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+ Tensor input_tensor(loco::DataType::S8, {2, 5}, {{0.5}, {-1}}, "");
+
+ _memory_manager->allocate_memory(input_tensor);
+ input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int8_t));
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint16)
+{
+ std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+ std::vector<float> ref_output_data{-64.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 65.5};
+
+ Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+ _memory_manager->allocate_memory(input_tensor);
+ input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, InvalidInputType_NEG)
+{
+ std::vector<float> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidOutputType_NEG)
+{
+ std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+ Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+ _memory_manager->allocate_memory(input_tensor);
+ input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidInputZeroPoint_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({2, 5}, 0.5, -1, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp
new file mode 100644
index 000000000..dd1532278
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Div.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/div.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params)
+ : KernelWithParams<DivParams>({input1, input2}, {output}, params)
+{
+}
+
+void Div::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Div::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Div::evalFloat() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<float>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastDivSlow(
+ params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+ getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<float>(input1()),
+ getTensorShape(input2()), getTensorData<float>(input2()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ }
+}
+
+template <typename T> void Div::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastDivSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
+void Div::evalQuantized() const
+{
+ const auto input1_scale = static_cast<double>(input1()->scale());
+ const auto input2_scale = static_cast<double>(input2()->scale());
+ const auto output_scale = static_cast<double>(output()->scale());
+
+ const double real_output_multiplier = input1_scale / (input2_scale * output_scale);
+
+ int32_t output_multiplier{};
+ int output_shift{};
+
+ quantizeMultiplier(real_output_multiplier, &output_multiplier, &output_shift);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::ArithmeticParams params{};
+
+ params.input1_offset = -input1()->zero_point(); // Note the '-'.
+ params.input2_offset = -input2()->zero_point(); // Note the '-'.
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = output_multiplier;
+ params.output_shift = output_shift;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastDivSlow(
+ params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+ getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+ getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.h b/compiler/luci-micro/luci-interpreter/src/kernels/Div.h
new file mode 100644
index 000000000..c1bf3e10b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DIV_H
+#define LUCI_INTERPRETER_KERNELS_DIV_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Div : public KernelWithParams<DivParams>
+{
+public:
+ Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params);
+
+ const Tensor *input1() const { return _inputs[0]; }
+ const Tensor *input2() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ template <typename T> void evalInteger() const;
+ void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DIV_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp
new file mode 100644
index 000000000..85cd8b90a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Div.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DivTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+float GetTolerance(float min, float max)
+{
+ const float kQuantizedStep = (max - min) / 255.0f;
+ const float kQuantizedTolerance = 2.0f * kQuantizedStep + kQuantizedStep * kQuantizedStep;
+ return kQuantizedTolerance;
+}
+
+TEST_F(DivTest, Float)
+{
+ Shape base_shape = {2, 3, 1, 1};
+
+ std::vector<int32_t> output_shape = {2, 3, 1, 1};
+
+ std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f};
+ std::vector<float> input2_data{0.2f, 1.6f, 0.5f, 0.4f, 1.6f, 0.4f};
+ std::vector<float> test_outputs{1.5f, 1.4375f, 1.8f, 1.25f, 0.5f, 2.75f};
+
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ DivParams params{};
+ params.activation = Activation::RELU;
+
+ Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST_F(DivTest, FloatBroadcast)
+{
+ Shape input1_shape = {1, 3};
+ Shape input2_shape = {3, 1};
+
+ std::vector<float> input1_data{-0.3f, 2.3f, 0.9f};
+ std::vector<float> input2_data{0.2f, 1.6f, 0.5f};
+ std::vector<float> test_outputs{0.f, 11.5f, 4.5f, 0.f, 1.4375f, 0.5625f, 0.f, 4.6f, 1.8f};
+
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ DivParams params{};
+ params.activation = Activation::RELU;
+
+ Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+}
+
+TEST_F(DivTest, Uint8)
+{
+ Shape base_shape = {1, 2, 2, 1};
+
+ std::vector<int32_t> output_shape = {1, 2, 2, 1};
+
+ std::vector<float> input1_data = {-0.8f, -0.2f, 0.3f, 0.7f};
+ std::vector<float> input2_data = {-0.8f, 0.4f, 0.8f, 1.0f};
+ std::vector<float> test_outputs{1.0f, 0.f, 0.375f, 0.7f};
+
+ const float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.f, 1.f);
+
+ Tensor input1_tensor = makeInputTensor<DataType::U8>(
+ base_shape, quant_param.first, quant_param.second, input1_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U8>(
+ base_shape, quant_param.first, quant_param.second, input2_data, _memory_manager.get());
+
+ Tensor output_tensor =
+ makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+ DivParams params{};
+ params.activation = Activation::RELU;
+
+ Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(test_outputs, kQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+
+ std::vector<std::vector<dtype>> test_outputs = {{5, 6, 2, 0, 10, 3, //
+ 10, 0, 4, 5, 20, 0, //
+ 0, 0, 0, 2, 0, 0, //
+ 2, 0, 1, 10, 5, 0, //
+ 2, 3, 1, 0, 5, 1, //
+ 18, 20, 7, 0, 37, 10},
+ {5, 6, 4, 5, 0, 0, 2, 0, 1, 0, 37, 10},
+ {5, 7, 4, 6, 2, 3, 10, 0, 8, 0, 4, 0,
+ 0, 0, 0, 0, 0, 0, 0, 10, 5, 0, 1, 0,
+ 0, 0, 5, 9, 1, 1, 0, 0, 37, 50, 7, 10},
+ {5, 7, 8, 0, 0, 0, 0, 10, 5, 9, 7, 10}};
+ std::vector<dtype> input1_data{20, 30, 40, -17, -4, -7, 11, -31, 10, 19, 75, 100};
+ std::vector<dtype> input2_data{4, 5, 10, -3, 2, 10};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ DivParams params{};
+ params.activation = Activation::RELU;
+
+ Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+}
+
+TEST_F(DivTest, SInt64)
+{
+ checkInteger<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(DivTest, SInt32)
+{
+ checkInteger<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(DivTest, Input_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ DivParams params{};
+ params.activation = Activation::RELU;
+
+ Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DivTest, Invalid_Input_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+ DivParams params{};
+ params.activation = Activation::RELU;
+
+ Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(DivTest, Invalid_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ DivParams params{};
+ params.activation = Activation::RELU;
+
+ Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp
new file mode 100644
index 000000000..697d63be4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Elu.h"
+#include "kernels/Utils.h"
+
+#include "PALElu.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Elu::Elu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Elu::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ output()->resize(input()->shape());
+}
+
+void Elu::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::Elu(getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h
new file mode 100644
index 000000000..c844ab57f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ELU_H
+#define LUCI_INTERPRETER_KERNELS_ELU_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Elu : public Kernel
+{
+public:
+ Elu(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ELU_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp
new file mode 100644
index 000000000..814499cdb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Elu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Elu kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ (void)output_shape;
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+}
+
+TEST(EluTest, SimpleElu)
+{
+ Check(
+ /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+ /*input_data=*/
+ {
+ 0, -6, 2, -4, //
+ 3, -2, 10, -0.1, //
+ },
+ /*output_data=*/
+ {
+ 0.0, -0.997521, 2.0, -0.981684, //
+ 3.0, -0.864665, 10.0, -0.0951626, //
+ });
+}
+
+TEST(EluTest, InOutTypeMismatch_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Shape input_shape{1, 2, 4, 1};
+ std::vector<float> input_data{
+ 0, -6, 2, -4, //
+ 3, -2, 10, -0.1, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ Elu kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp
new file mode 100644
index 000000000..a57e127b7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Equal.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Equal::Equal(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Equal::configure()
+{
+ LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+ if (x()->element_type() == DataType::U8)
+ {
+ quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+ quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+ }
+ output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Equal::execute() const
+{
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Equal::evalFloat() const
+{
+ const auto x_data = getTensorData<float>(x());
+ const auto y_data = getTensorData<float>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowEqual(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::Equal(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+ y_data, getTensorShape(output()), output_data);
+ }
+}
+
+template <typename T> void Equal::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+void Equal::evalQuantized() const
+{
+ const auto x_data = getTensorData<uint8_t>(x());
+ const auto y_data = getTensorData<uint8_t>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.left_shift = 8;
+ op_params.input1_offset = -x()->zero_point(); // Note the '-'
+ op_params.input1_shift = _x_shift;
+ op_params.input1_multiplier = _x_multiplier;
+ op_params.input2_offset = -y()->zero_point(); // Note the '-'
+ op_params.input2_shift = _y_shift;
+ op_params.input2_multiplier = _y_multiplier;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowEqualWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::EqualWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h
new file mode 100644
index 000000000..c9be32cc0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Equal : public Kernel
+{
+public:
+ Equal(const Tensor *x, const Tensor *y, Tensor *output);
+
+ const Tensor *x() const { return _inputs[0]; }
+ const Tensor *y() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ template <typename T> void evalInteger() const;
+ void evalQuantized() const;
+
+private:
+ int32_t _x_multiplier = 0;
+ int _x_shift = 0;
+ int32_t _y_multiplier = 0;
+ int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EQUAL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp
new file mode 100644
index 000000000..5870e5460
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Equal.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class EqualTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(EqualTest, FloatSimple)
+{
+ std::vector<float> x_data{
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.7, 0.5, // Row 1
+ -1, 0, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, false, // Row 1
+ false, true, false, // Row 2
+ };
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(EqualTest, FloatBroardcast)
+{
+ std::vector<float> x_data{
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ -1, 0, 1, // Row 3
+ 0.9, 0.7, 0.5, // Row 4
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.7, 0.5, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, false, // Row 1
+ false, false, false, // Row 2
+ false, false, false, // Row 3
+ true, true, true, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value, -2, max_value};
+
+ std::vector<bool> ref_output_data{true, false, true};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -2, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value, -2, max_value, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, false, // Row 1
+ false, false, true, // Row 2
+ false, true, false, // Row 3
+ true, true, true, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(EqualTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(EqualTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(EqualTest, Uint8Quantized)
+{
+ std::vector<float> x_data{
+ 0.5, 0.5, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.5, 0.55, 0.5, // Row 1
+ -1, 0, 0.05, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, false, false, // Row 1
+ false, true, true, false, // Row 2
+ };
+
+ std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+
+ std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(EqualTest, Uint8QuantizedBroadcast)
+{
+ std::vector<float> x_data{
+ 0.4, -0.8, 0.7, 0.3, // Row 1
+ -0.5, 0.1, 0, 0.5, // Row 2
+ 1, 0, 0.05, -1, // Row 3
+ -1, 0.05, 0, 1, // Row 4
+ };
+
+ std::vector<float> y_data{
+ -1, 0.05, 0, 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, false, false, false, // Row 1
+ false, false, true, false, // Row 2
+ false, false, false, false, // Row 3
+ true, true, true, true, // Row 4
+ };
+
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(EqualTest, Input_Type_Mismatch_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Input_Output_Type_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp
new file mode 100644
index 000000000..e7c560a88
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Exp.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/exp.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Exp::Exp(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Exp::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ output()->resize(input()->shape());
+}
+
+void Exp::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Exp::evalFloat() const
+{
+ const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+ tflite::reference_ops::Exp(getTensorData<float>(input()), size, getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h
new file mode 100644
index 000000000..429177375
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EXP_H
+#define LUCI_INTERPRETER_KERNELS_EXP_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Exp : public Kernel
+{
+public:
+ Exp(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EXP_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp
new file mode 100644
index 000000000..a159d9db9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Exp.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(ExpTest, Float)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Shape input_shape{1, 1, 7};
+ std::vector<float> input_data{0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Exp kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<int32_t> ref_output_shape{1, 1, 7};
+ std::vector<float> ref_output_data{std::exp(0.0f), std::exp(1.0f), std::exp(-1.0f),
+ std::exp(100.0f), std::exp(-100.0f), std::exp(0.01f),
+ std::exp(-0.01f)};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp
new file mode 100644
index 000000000..ba35c99fa
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ExpandDims.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ExpandDims::ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output)
+ : Kernel({input, axis}, {output})
+{
+}
+
+void ExpandDims::configure()
+{
+ int32_t axis_value;
+
+ switch (axis()->element_type())
+ {
+ case loco::DataType::S32:
+ axis_value = *getTensorData<int32_t>(axis());
+ break;
+ case loco::DataType::S64:
+ axis_value = static_cast<int32_t>(*getTensorData<int64_t>(axis()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+
+ const auto input_shape = input()->shape();
+
+ if (axis_value < 0)
+ {
+ axis_value += input_shape.num_dims() + 1;
+ }
+
+ LUCI_INTERPRETER_CHECK(axis_value <= input_shape.num_dims() and axis_value >= 0);
+
+ Shape output_shape(input_shape.num_dims() + 1);
+ for (int32_t i = 0; i < output_shape.num_dims(); ++i)
+ {
+ if (i < axis_value)
+ {
+ output_shape.dim(i) = input_shape.dim(i);
+ }
+ else if (i == axis_value)
+ {
+ output_shape.dim(i) = 1;
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(i >= 1);
+ output_shape.dim(i) = input_shape.dim(i - 1);
+ }
+ }
+
+ output()->resize(output_shape);
+}
+
+void ExpandDims::execute() const
+{
+ // Just copy input to output
+ const auto *input_data = input()->data<void>();
+ auto *output_data = output()->data<void>();
+
+ const size_t element_size = getDataTypeSize(input()->element_type());
+ const int32_t num_elements = input()->shape().num_elements();
+ std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h
new file mode 100644
index 000000000..e510b1160
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+#define LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ExpandDims : public Kernel
+{
+public:
+ ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *axis() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp
new file mode 100644
index 000000000..df9eaccc0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ExpandDims.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ExpandDimsTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ExpandDimsTest, PositiveAxis)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<int32_t> axis_value = {0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2}));
+}
+
+TEST_F(ExpandDimsTest, NegAxis)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<int32_t> axis_value = {-1};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1}));
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisType_NEG)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<float> axis_value = {1.0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::FLOAT32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisValue_NEG)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<int32_t> axis_value = {3};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp
new file mode 100644
index 000000000..e09d6331a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Fill.h"
+#include "kernels/Utils.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output)
+ : Kernel({dims, value}, {output})
+{
+}
+
+template <typename T> void Fill::configureShape()
+{
+ const auto dims_data = getTensorData<T>(dims());
+ Shape output_shape(dims()->shape().dim(0));
+
+ for (int i = 0; i < output_shape.num_dims(); ++i)
+ {
+ T data = dims_data[i];
+ if (data < 0)
+ throw std::runtime_error("Fill dimensions must be >= 0");
+
+ output_shape.dim(i) = data;
+ }
+
+ output()->resize(output_shape);
+}
+
+void Fill::configure()
+{
+ const auto dims_shape = dims()->shape();
+ const auto value_shape = value()->shape();
+
+ // Make sure the 1st input tensor is 1-D
+ LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1);
+
+ // Make sure the 1st input tensor is int32 or int64
+ LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or
+ dims()->element_type() == DataType::S64);
+
+ // Make sure the 2nd input tensor is a scalar
+ LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0)
+
+ // Check zero point and scale for S16 and S8
+ if (value()->element_type() == loco::DataType::S16 or
+ value()->element_type() == loco::DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale());
+ LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point());
+
+ if (value()->element_type() == loco::DataType::S16)
+ LUCI_INTERPRETER_CHECK(value()->zero_point() == 0);
+ }
+ // Resize output
+ switch (dims()->element_type())
+ {
+ case DataType::S32:
+ configureShape<int32_t>();
+ break;
+ case DataType::S64:
+ configureShape<int64_t>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Fill::execute() const
+{
+ switch (output()->element_type())
+ {
+ case DataType::S8:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int8_t>(value()),
+ getTensorShape(output()), getTensorData<int8_t>(output()));
+ break;
+ case DataType::S16:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int16_t>(value()),
+ getTensorShape(output()), getTensorData<int16_t>(output()));
+ break;
+ case DataType::S32:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int32_t>(value()),
+ getTensorShape(output()), getTensorData<int32_t>(output()));
+ break;
+ case DataType::S64:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int64_t>(value()),
+ getTensorShape(output()), getTensorData<int64_t>(output()));
+ break;
+ case DataType::FLOAT32:
+ tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<float>(value()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h
new file mode 100644
index 000000000..184f0cb83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FILL_H
+#define LUCI_INTERPRETER_KERNELS_FILL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Fill : public Kernel
+{
+public:
+ Fill(const Tensor *dims, const Tensor *value, Tensor *output);
+
+ const Tensor *dims() const { return _inputs[0]; }
+ const Tensor *value() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> void configureShape();
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FILL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp
new file mode 100644
index 000000000..cf56df507
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Fill.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FillTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+template <typename T, DataType DT> void runFillIntKernel(IMemoryManager *memory_manager)
+{
+ Shape dims_shape{2};
+
+ std::vector<int32_t> dims_data = {2, 3};
+ std::vector<T> value_data = {5};
+
+ Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager);
+ Tensor value = makeInputTensor<DT>(/*scalar*/ {}, value_data, memory_manager);
+
+ Tensor output_tensor = makeOutputTensor(DT);
+
+ Fill kernel(&dims, &value, &output_tensor);
+
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<T> ref_output_data{5, 5, 5, 5, 5, 5};
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data);
+
+ std::vector<int32_t> ref_output_shape{2, 3};
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+template <DataType DT> void runFillQuantIntKernel(IMemoryManager *memory_manager)
+{
+ Shape dims_shape{2};
+
+ std::vector<int32_t> dims_data = {2, 3};
+ std::vector<float> value_data = {5};
+
+ int32_t zero_point = 0;
+
+ if (DT == loco::DataType::S8)
+ zero_point = 1;
+
+ Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager);
+ Tensor value = makeInputTensor<DT>(/*scalar*/ {}, /*scale*/ 0.25, /*zero_point*/ zero_point,
+ value_data, memory_manager);
+
+ Tensor output_tensor = makeOutputTensor(DT, /*scale*/ 0.25, /*zero_point*/ zero_point);
+
+ Fill kernel(&dims, &value, &output_tensor);
+
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5};
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+
+ std::vector<int32_t> ref_output_shape{2, 3};
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FillTest, FillInt)
+{
+ // Run for int32_t input
+ runFillIntKernel<int32_t, loco::DataType::S32>(_memory_manager.get());
+ // Run for int64_t input
+ runFillIntKernel<int64_t, loco::DataType::S64>(_memory_manager.get());
+ // Run for int8_t input
+ runFillQuantIntKernel<loco::DataType::S8>(_memory_manager.get());
+ // Run for int16_t input
+ runFillQuantIntKernel<loco::DataType::S16>(_memory_manager.get());
+
+ SUCCEED();
+}
+
+TEST_F(FillTest, FillFloat)
+{
+ Shape dims_shape{3};
+
+ std::vector<int64_t> dims_data = {2, 2, 2};
+ std::vector<float> value_data = {5};
+
+ Tensor dims = makeInputTensor<loco::DataType::S64>(dims_shape, dims_data, _memory_manager.get());
+ Tensor value =
+ makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+ Fill kernel(&dims, &value, &output_tensor);
+
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5, 5, 5};
+
+ std::vector<int32_t> ref_output_shape{2, 2, 2};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), ref_output_data);
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FillTest, Invalid_Input_Shape_NEG)
+{
+ Shape dims_shape{1, 3};
+
+ std::vector<int32_t> dims_data = {2, 2, 2};
+ std::vector<float> value_data = {5};
+
+ Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get());
+ Tensor value =
+ makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+ Fill kernel(&dims, &value, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FillTest, Invalid_Value_Shape_NEG)
+{
+ Shape dims_shape{3};
+
+ std::vector<int32_t> dims_data = {2, 2, 2};
+ std::vector<float> value_data = {5};
+
+ Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get());
+ Tensor value = makeInputTensor<loco::DataType::FLOAT32>({1}, value_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+ Fill kernel(&dims, &value, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp
new file mode 100644
index 000000000..e3c4246cc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Floor.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/floor.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Floor::Floor(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Floor::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ output()->resize(input()->shape());
+}
+
+void Floor::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Floor::evalFloat() const
+{
+ tflite::reference_ops::Floor(getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h
new file mode 100644
index 000000000..ca3ad5997
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_H
+#define LUCI_INTERPRETER_KERNELS_FLOOR_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Floor : public Kernel
+{
+public:
+ Floor(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FLOOR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp
new file mode 100644
index 000000000..30076fb54
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Floor.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FloorTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorTest, SimpleFloat)
+{
+ std::initializer_list<int32_t> input_shape{1, 2, 4, 1};
+ std::vector<float> input_data{
+ 0.2, 8.6, 2.4, 4.3, // Row 1
+ 3, 7.1, 10.5, -0.9, // Row 2
+ };
+
+ std::initializer_list<int32_t> ref_output_shape{1, 2, 4, 1};
+ std::vector<float> ref_output_data{
+ 0, 8, 2, 4, // Row 1
+ 3, 7, 10, -1, // Row 2
+ };
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Floor kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorTest, Input_Output_Type_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ Floor kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp
new file mode 100644
index 000000000..a7a10a336
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorDiv.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output)
+ : Kernel({input, alpha}, {output})
+{
+}
+
+void FloorDiv::configure()
+{
+ LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type());
+
+ output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void FloorDiv::execute() const
+{
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void FloorDiv::evalFloat() const
+{
+ auto FloorDivFunc = [](float x, float y) -> float {
+ return std::floor(static_cast<double>(x) / static_cast<double>(y));
+ };
+
+ const auto x_data = getTensorData<float>(x());
+ const auto y_data = getTensorData<float>(y());
+
+ // Check the denominator
+ for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i)
+ {
+ LUCI_INTERPRETER_CHECK(y_data[i] != 0);
+ }
+
+ if (x()->shape() != y()->shape())
+ {
+ tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+ getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ getTensorData<float>(output()), FloorDivFunc);
+ }
+ else
+ {
+ tflite::reference_ops::BinaryFunction<float, float, float>(
+ getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ getTensorData<float>(output()), FloorDivFunc);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h
new file mode 100644
index 000000000..e9c47d81a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
+#define LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class FloorDiv : public Kernel
+{
+public:
+ FloorDiv(const Tensor *x, const Tensor *y, Tensor *output);
+
+ const Tensor *x() const { return _inputs[0]; }
+ const Tensor *y() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp
new file mode 100644
index 000000000..3e1b5f18e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorDiv.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FloorDivTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorDivTest, FloatSimple)
+{
+ Shape x_shape{2, 3};
+ std::vector<float> x_data{
+ 0.5, 2.4, 3.1, // Row 1
+ 1.9, -1.9, -2.8, // Row 2
+ };
+
+ Shape y_shape = x_shape;
+ std::vector<float> y_data{
+ 2.0, 0.5, 3.0, // Row 1
+ 1.0, -1.0, -2.0, // Row 2
+ };
+
+ std::vector<int32_t> ref_output_shape{2, 3};
+ std::vector<float> ref_output_data{
+ 0, 4, 1, // Row 1
+ 1, 1, 1, // Row 2
+ };
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorDivTest, FloatBroadcast)
+{
+ Shape x_shape{1, 3};
+ std::vector<float> x_data{
+ 0.5, 2.4, -3.1, // Row 1
+ };
+
+ Shape y_shape{3, 3};
+ std::vector<float> y_data{
+ 1.0, 1.0, 1.0, // Row 1
+ 2.0, -0.5, -2.0, // Row 2
+ 0.3, 0.7, 0.9, // Row 3
+ };
+
+ std::vector<int32_t> ref_output_shape{3, 3};
+ std::vector<float> ref_output_data{
+ 0, 2, -4, // Row 1
+ 0, -5, 1, // Row 2
+ 1, 3, -4, // Row 3
+ };
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorDivTest, DivByZero_NEG)
+{
+ Shape shape{3};
+ std::vector<float> x_data{1, 0, -1};
+ std::vector<float> y_data{0, 0, 0};
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(shape, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(shape, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(FloorDivTest, Input_Output_Type_Mismatch_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FloorDivTest, Input_Type_Mismatch_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp
new file mode 100644
index 000000000..bd2bb2f35
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FullyConnected.h"
+
+#include "kernels/Utils.h"
+
+#include "PALFullyConnected.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+FullyConnected::FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias,
+ Tensor *output, const FullyConnectedParams &params)
+ : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params)
+{
+}
+
+void FullyConnected::configure()
+{
+ if (weights()->element_type() == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::U8);
+ LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
+ }
+ else if (weights()->element_type() == DataType::FLOAT32)
+ {
+ LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::FLOAT32)
+ }
+ else if (weights()->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8);
+ LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported type.");
+ }
+
+ const Shape &input_shape = input()->shape();
+ const Shape &weights_shape = weights()->shape();
+
+ LUCI_INTERPRETER_CHECK(weights_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(bias() == nullptr ||
+ bias()->shape().num_elements() == weights_shape.dim(0));
+
+ LUCI_INTERPRETER_CHECK(input_shape.num_elements() % weights_shape.dim(1) == 0);
+ const int32_t batch_size = input_shape.num_elements() / weights_shape.dim(1);
+ const int32_t num_units = weights_shape.dim(0);
+
+ if (bias())
+ LUCI_INTERPRETER_CHECK(bias()->shape().num_elements() == weights()->shape().dim(0));
+
+ if (params().keep_num_dims == false)
+ {
+ output()->resize({batch_size, num_units});
+ }
+ else
+ {
+ luci_interpreter::Shape output_shape(input_shape.num_dims());
+ for (int i = 0; i < input_shape.num_dims(); ++i)
+ output_shape.dim(i) = input_shape.dim(i);
+ output_shape.dim(input_shape.num_dims() - 1) = num_units;
+ output()->resize(output_shape);
+ }
+}
+
+void FullyConnected::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::U8:
+ evalQuantized();
+ break;
+ case DataType::S8:
+ evalQuantizedS8();
+ break;
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void FullyConnected::evalFloat() const
+{
+ float activation_min{};
+ float activation_max{};
+ calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+ tflite::FullyConnectedParams params{};
+ params.float_activation_min = activation_min;
+ params.float_activation_max = activation_max;
+ params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
+
+ tflite::reference_ops::FullyConnected(
+ params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()),
+ getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void FullyConnected::evalQuantized() const
+{
+ double real_multiplier = 0.0;
+ int output_shift;
+ int32_t output_activation_min;
+ int32_t output_activation_max;
+ int32_t output_multiplier;
+ real_multiplier =
+ getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
+ quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+ calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
+ &output_activation_max);
+
+ int32_t input_offset = -input()->zero_point();
+ int32_t filter_offset = -weights()->zero_point();
+ int32_t output_offset = output()->zero_point();
+
+ tflite::FullyConnectedParams op_params{};
+ op_params.input_offset = input_offset;
+ op_params.weights_offset = filter_offset;
+ op_params.output_offset = output_offset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.lhs_cacheable = false;
+ op_params.rhs_cacheable = false;
+ tflite::reference_ops::FullyConnected(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(weights()),
+ getTensorData<uint8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void FullyConnected::evalQuantizedS8() const
+{
+ double real_multiplier = 0.0;
+ int output_shift;
+ int32_t output_activation_min;
+ int32_t output_activation_max;
+ int32_t output_multiplier;
+ real_multiplier =
+ getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
+ quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+ calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
+ &output_activation_max);
+
+ int32_t input_offset = -input()->zero_point();
+ int32_t filter_offset = -weights()->zero_point();
+ int32_t output_offset = output()->zero_point();
+
+ tflite::FullyConnectedParams op_params{};
+ op_params.input_offset = input_offset;
+ op_params.weights_offset = filter_offset;
+ op_params.output_offset = output_offset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.lhs_cacheable = false;
+ op_params.rhs_cacheable = false;
+ luci_interpreter_pal::FullyConnected<int8_t>(
+ op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()),
+ getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+ getTensorShape(output()), getTensorData<int8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h
new file mode 100644
index 000000000..2a7c068c0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class FullyConnected : public KernelWithParams<FullyConnectedParams>
+{
+public:
+ FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias, Tensor *output,
+ const FullyConnectedParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *weights() const { return _inputs[1]; }
+ const Tensor *bias() const { return _inputs[2]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+ void evalQuantizedS8() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp
new file mode 100644
index 000000000..4474cc4fb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FullyConnected.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+ std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+ std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ FullyConnectedParams params{};
+ params.activation = Activation::RELU;
+
+ FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<int8_t>(std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> weights_shape,
+ std::initializer_list<int32_t> bias_shape,
+ std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data,
+ std::initializer_list<float> weights_data,
+ std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ const float quantized_tolerance = getTolerance(-127, 128, 255);
+ std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-63.5, 64);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::S8>(weights_shape, input_quant_param.first, input_quant_param.second,
+ weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+ bias_data, memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+ FullyConnectedParams params{};
+ params.activation = Activation::RELU;
+
+ FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <>
+void Check<uint8_t>(
+ std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+ std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+ std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ const float quantized_tolerance = getTolerance(-127, 128, 255);
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first, input_quant_param.second,
+ weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+ bias_data, memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+ FullyConnectedParams params{};
+ params.activation = Activation::RELU;
+
+ FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <typename T> class FullyConnectedTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(FullyConnectedTest, DataTypes);
+
+TYPED_TEST(FullyConnectedTest, Simple)
+{
+ Check<TypeParam>({3, 2, 2, 1}, {3, 6}, {3}, {2, 3},
+ {
+ -3, -5, 5, 4, 9, -2, // batch = 0
+ -3, -2, -4, 9, -8, 1, // batch = 1
+ },
+ {
+ -3, -7, 4, -4, -6, 4, // unit = 0
+ 3, 5, 2, 3, -3, -8, // unit = 1
+ -3, 7, 4, 9, 0, -5, // unit = 2
+ },
+ {-1, -5, -8},
+ {
+ 0, 0, 32, // batch = 0
+ 22, 11, 47, // batch = 1
+ });
+}
+
+TEST(FullyConnectedTest, InvalidBiasType_NEG)
+{
+ Shape input_shape{3, 2, 2, 1};
+ std::vector<float> input_data{
+ -3, -5, 5, 4, 9, -2, // batch = 0
+ -3, -2, -4, 9, -8, 1, // batch = 1
+ };
+ Shape weights_shape{3, 6};
+ std::vector<float> weights_data{
+ -3, -7, 4, -4, -6, 4, // unit = 0
+ 3, 5, 2, 3, -3, -8, // unit = 1
+ -3, 7, 4, 9, 0, -5, // unit = 2
+ };
+ Shape bias_shape{3};
+ std::vector<int32_t> bias_data{-1, -5, -8};
+
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ FullyConnectedParams params{};
+ params.activation = Activation::RELU;
+
+ FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG)
+{
+ Shape input_shape{3, 2, 2, 1};
+ std::vector<float> input_data{
+ -3, -5, 5, 4, 9, -2, // batch = 0
+ -3, -2, -4, 9, -8, 1, // batch = 1
+ };
+ Shape weights_shape{1, 3, 6};
+ std::vector<float> weights_data{
+ -3, -7, 4, -4, -6, 4, // unit = 0
+ 3, 5, 2, 3, -3, -8, // unit = 1
+ -3, 7, 4, 9, 0, -5, // unit = 2
+ };
+ Shape bias_shape{3};
+ std::vector<float> bias_data{-1, -5, -8};
+
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ FullyConnectedParams params{};
+ params.activation = Activation::RELU;
+
+ FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG)
+{
+ Shape input_shape{3, 2, 2, 1};
+ std::vector<float> input_data{
+ -3, -5, 5, 4, 9, -2, // batch = 0
+ -3, -2, -4, 9, -8, 1, // batch = 1
+ };
+ Shape weights_shape{6, 3};
+ std::vector<float> weights_data{
+ -3, -7, 4, // unit = 0
+ -4, -6, 4, // unit = 1
+ 3, 5, 2, // unit = 2
+ 3, -3, -8, // unit = 3
+ -3, 7, 4, // unit = 4
+ 9, 0, -5, // unit = 5
+ };
+ Shape bias_shape{3};
+ std::vector<float> bias_data{-1, -5, -8};
+
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ FullyConnectedParams params{};
+ params.activation = Activation::RELU;
+
+ FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp
new file mode 100644
index 000000000..f1256660f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/Utils.h"
+#include "PALGather.h"
+
+#include <stdexcept>
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output,
+ const GatherParams &gparams)
+ : KernelWithParams<GatherParams>({params, indices}, {output}, gparams)
+{
+}
+
+void Gather::configure()
+{
+ if (params()->element_type() == DataType::FLOAT32)
+ {
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported type.");
+ }
+
+ LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 ||
+ indices()->element_type() == DataType::S64);
+
+ // refer tensorflow/lite/kernels/gather.cc
+
+ const Shape &params_shape = params()->shape();
+ const Shape &indices_shape = indices()->shape();
+
+ int axis = _params.axis;
+ if (axis < 0)
+ {
+ axis += params_shape.num_dims();
+ }
+ LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims());
+
+ int batch_dims = _params.batch_dims;
+ // batch_dims should be in range: [-rank(indices), rank(indices)].
+ // Negative batch_dims is added with rank of positions.
+ if (batch_dims < 0)
+ {
+ batch_dims += indices_shape.num_dims();
+ }
+ LUCI_INTERPRETER_CHECK(batch_dims <= axis);
+ LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims());
+ LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims());
+ for (int i = 0; i < batch_dims; ++i)
+ {
+ LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i));
+ }
+
+ const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims;
+
+ Shape output_shape(num_dimensions);
+ int output_index = 0;
+ for (int i = 0; i < axis; ++i)
+ {
+ output_shape.dim(output_index++) = params_shape.dim(i);
+ }
+ for (int i = batch_dims; i < indices_shape.num_dims(); ++i)
+ {
+ output_shape.dim(output_index++) = indices_shape.dim(i);
+ }
+ for (int i = axis + 1; i < params_shape.num_dims(); ++i)
+ {
+ output_shape.dim(output_index++) = params_shape.dim(i);
+ }
+ output()->resize(output_shape);
+}
+
+void Gather::execute() const
+{
+ switch (params()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Gather::evalFloat() const
+{
+ assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64);
+
+ const auto params_data = getTensorData<float>(params());
+ auto output_data = getTensorData<float>(output());
+
+ tflite::GatherParams tparams;
+ tparams.axis = _params.axis;
+ tparams.batch_dims = _params.batch_dims;
+
+ if (indices()->element_type() == DataType::S32)
+ {
+ const auto indices_data = getTensorData<int32_t>(indices());
+
+ luci_interpreter_pal::Gather<float, int32_t>(tparams, getTensorShape(params()), params_data,
+ getTensorShape(indices()), indices_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ const auto indices_data = getTensorData<int64_t>(indices());
+
+ luci_interpreter_pal::Gather<float, int64_t>(tparams, getTensorShape(params()), params_data,
+ getTensorShape(indices()), indices_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h
new file mode 100644
index 000000000..cc02d64fb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GATHER_H
+#define LUCI_INTERPRETER_KERNELS_GATHER_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Gather : public KernelWithParams<GatherParams>
+{
+public:
+ Gather(const Tensor *params, const Tensor *indices, Tensor *output, const GatherParams &gparams);
+
+ const Tensor *params() const { return _inputs[0]; }
+ const Tensor *indices() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GATHER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp
new file mode 100644
index 000000000..4b3dda708
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GatherTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GatherTest, Simple)
+{
+ std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
+ std::vector<int32_t> indices_data{1, 0, 1, 5};
+ std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f};
+
+ Tensor params_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get());
+ Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ gparams.axis = 1;
+ gparams.batch_dims = 0;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4}));
+}
+
+TEST_F(GatherTest, Simple_Batch)
+{
+ Shape params_shape = {3, 5};
+ Shape indices_shape = {3, 2};
+ std::vector<float> params_data{0., 0., 1., 0., 2., 3., 0., 0., 0., 4., 0., 5., 0., 6., 0.};
+ std::vector<int32_t> indices_data{2, 4, 0, 4, 1, 3};
+ std::vector<float> ref_output_data{1., 2., 3., 4., 5., 6.};
+
+ Tensor params_tensor =
+ makeInputTensor<DataType::FLOAT32>(params_shape, params_data, _memory_manager.get());
+ Tensor indices_tensor =
+ makeInputTensor<DataType::S32>(indices_shape, indices_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ gparams.axis = 1;
+ gparams.batch_dims = 1;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 2}));
+}
+
+TEST_F(GatherTest, Simple_NEG)
+{
+ Tensor params_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+ Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GatherTest, Axis_NEG)
+{
+ Tensor params_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ gparams.axis = 100;
+ gparams.batch_dims = 0;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GatherTest, Batch_NEG)
+{
+ std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
+ std::vector<int32_t> indices_data{1, 0, 1, 5};
+ std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f};
+
+ Tensor params_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get());
+ Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ gparams.axis = 0;
+ gparams.batch_dims = 1;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp
new file mode 100644
index 000000000..5ccae3c38
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Greater.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Greater::Greater(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Greater::configure()
+{
+ LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+ if (x()->element_type() == DataType::U8)
+ {
+ quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+ quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+ }
+ output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Greater::execute() const
+{
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Greater::evalFloat() const
+{
+ const auto x_data = getTensorData<float>(x());
+ const auto y_data = getTensorData<float>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreater(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::Greater(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+ y_data, getTensorShape(output()), output_data);
+ }
+}
+
+template <typename T> void Greater::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+void Greater::evalQuantized() const
+{
+ const auto x_data = getTensorData<uint8_t>(x());
+ const auto y_data = getTensorData<uint8_t>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.left_shift = 8;
+ op_params.input1_offset = -x()->zero_point(); // Note the '-'
+ op_params.input1_shift = _x_shift;
+ op_params.input1_multiplier = _x_multiplier;
+ op_params.input2_offset = -y()->zero_point(); // Note the '-'
+ op_params.input2_shift = _y_shift;
+ op_params.input2_multiplier = _y_multiplier;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreaterWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::GreaterWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h
new file mode 100644
index 000000000..065f76d7b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GREATER_H
+#define LUCI_INTERPRETER_KERNELS_GREATER_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Greater : public Kernel
+{
+public:
+ Greater(const Tensor *x, const Tensor *y, Tensor *output);
+
+ const Tensor *x() const { return _inputs[0]; }
+ const Tensor *y() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ template <typename T> void evalInteger() const;
+ void evalQuantized() const;
+
+private:
+ int32_t _x_multiplier = 0;
+ int _x_shift = 0;
+ int32_t _y_multiplier = 0;
+ int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GREATER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp
new file mode 100644
index 000000000..a48080124
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Greater.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GreaterTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GreaterTest, FloatSimple)
+{
+ std::vector<float> x_data{
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.7, 0.5, // Row 1
+ -1, 0, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ false, false, true, // Row 1
+ true, false, false, // Row 2
+ };
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(GreaterTest, FloatBroardcast)
+{
+ std::vector<float> x_data{
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ -1, 0, 1, // Row 3
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.7, 0.5, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, false, true, // Row 1
+ true, false, false, // Row 2
+ false, false, true, // Row 3
+ };
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+ std::vector<bool> ref_output_data{false, true, false};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -4, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value + 1, -2, max_value - 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, false, // Row 1
+ true, true, true, // Row 2
+ true, false, false, // Row 3
+ false, false, true, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(GreaterTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(GreaterTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(GreaterTest, Uint8Quantized)
+{
+ std::vector<float> x_data{
+ 0.5, 0.6, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.6, 0.6, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ false, false, true, true, // Row 1
+ true, false, true, false, // Row 2
+ };
+
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterTest, Uint8QuantizedRescale)
+{
+ std::vector<float> x_data{
+ 0.5, 0.6, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.6, 0.6, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ false, false, true, true, // Row 1
+ true, false, true, false, // Row 2
+ };
+
+ std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 3);
+
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterTest, Uint8QuantizedBroadcast)
+{
+ std::vector<float> x_data{
+ 0.4, -0.8, 0.7, 0.3, // Row 1
+ -0.5, 0.1, 0, 0.5, // Row 2
+ 1, 0, 0.05, -1, // Row 3
+ };
+
+ std::vector<float> y_data{
+ -1, 0.05, 0, 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, true, false, // Row 1
+ true, true, false, false, // Row 2
+ true, false, true, false, // Row 3
+ };
+
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterTest, Input_Type_Mismatch_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Input_Output_Type_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp
new file mode 100644
index 000000000..27e42c971
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/GreaterEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+GreaterEqual::GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output)
+ : Kernel({x, y}, {output})
+{
+}
+
+void GreaterEqual::configure()
+{
+ LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+ if (x()->element_type() == DataType::U8)
+ {
+ quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+ quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+ }
+ output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void GreaterEqual::execute() const
+{
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void GreaterEqual::evalFloat() const
+{
+ const auto x_data = getTensorData<float>(x());
+ const auto y_data = getTensorData<float>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreaterEqual(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::GreaterEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+ y_data, getTensorShape(output()), output_data);
+ }
+}
+
+template <typename T> void GreaterEqual::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
+ op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+ else
+ {
+ tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
+void GreaterEqual::evalQuantized() const
+{
+ const auto x_data = getTensorData<uint8_t>(x());
+ const auto y_data = getTensorData<uint8_t>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.left_shift = 8;
+ op_params.input1_offset = -x()->zero_point(); // Note the '-'
+ op_params.input1_shift = _x_shift;
+ op_params.input1_multiplier = _x_multiplier;
+ op_params.input2_offset = -y()->zero_point(); // Note the '-'
+ op_params.input2_shift = _y_shift;
+ op_params.input2_multiplier = _y_multiplier;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
+ op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+ else
+ {
+ tflite::reference_ops::GreaterEqualWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h
new file mode 100644
index 000000000..e333c30a6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class GreaterEqual : public Kernel
+{
+public:
+ GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+ const Tensor *x() const { return _inputs[0]; }
+ const Tensor *y() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ template <typename T> void evalInteger() const;
+ void evalQuantized() const;
+
+private:
+ int32_t _x_multiplier = 0;
+ int _x_shift = 0;
+ int32_t _y_multiplier = 0;
+ int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp
new file mode 100644
index 000000000..35bf88eab
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/GreaterEqual.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GreaterEqualTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GreaterEqualTest, FloatSimple)
+{
+ std::vector<float> x_data{
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.7, 0.5, // Row 1
+ -1, 0, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, true, // Row 1
+ true, true, false, // Row 2
+ };
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(GreaterEqualTest, FloatBroardcast)
+{
+ std::vector<float> x_data{
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ -1, 0, 1, // Row 3
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.7, 0.5, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, true, // Row 1
+ true, false, false, // Row 2
+ false, false, true, // Row 3
+ };
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+ std::vector<bool> ref_output_data{false, true, true};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -4, -3, // Row 3
+ min_value, -2, max_value - 1, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value + 1, -2, max_value - 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, false, // Row 1
+ true, true, true, // Row 2
+ true, false, false, // Row 3
+ false, true, true, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(GreaterEqualTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(GreaterEqualTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(GreaterEqualTest, Uint8Quantized)
+{
+ std::vector<float> x_data{
+ 0.5, 0.6, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.6, 0.55, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, true, true, // Row 1
+ true, false, true, false, // Row 2
+ };
+
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterEqualTest, Uint8QuantizedRescale)
+{
+ std::vector<float> x_data{
+ 0.5, 0.5, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.5, 0.6, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, true, true, // Row 1
+ true, false, true, false, // Row 2
+ };
+
+ std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterEqualTest, Uint8QuantizedBroadcast)
+{
+ std::vector<float> x_data{
+ 0.4, -0.8, 0.7, 0.3, // Row 1
+ -0.5, 0.1, 0, 0.5, // Row 2
+ 1, 0, 0.05, -1, // Row 3
+ };
+
+ std::vector<float> y_data{
+ -1, 0.05, 0, 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, true, false, // Row 1
+ true, true, true, false, // Row 2
+ true, false, true, false, // Row 3
+ };
+
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterEqualTest, Input_Type_Mismatch_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Input_Output_Type_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp
new file mode 100644
index 000000000..971708bca
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/If.h"
+#include "kernels/Utils.h"
+
+#include <cstring>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+static std::vector<const Tensor *> joinInputs(const Tensor *cond,
+ const std::vector<const Tensor *> &inputs)
+{
+ std::vector<const Tensor *> result{cond};
+ result.insert(result.cend(), inputs.cbegin(), inputs.cend());
+ return result;
+}
+
+If::If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs,
+ RuntimeGraph *then_graph, RuntimeGraph *else_graph)
+ : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph),
+ _else_graph(else_graph)
+{
+}
+
+void If::configure()
+{
+ LUCI_INTERPRETER_CHECK(cond()->element_type() == DataType::BOOL);
+ LUCI_INTERPRETER_CHECK(cond()->shape().num_elements() == 1);
+
+ for (RuntimeGraph *graph : {_then_graph, _else_graph})
+ {
+ (void)graph;
+ LUCI_INTERPRETER_CHECK(graph->getInputTensors().size() == getInputTensors().size() - 1);
+ LUCI_INTERPRETER_CHECK(graph->getOutputTensors().size() == getOutputTensors().size());
+ }
+}
+
+void If::execute() const
+{
+ const bool cond_value = cond()->data<bool>()[0];
+
+ RuntimeGraph *active_graph = cond_value ? _then_graph : _else_graph;
+ const auto &graph_inputs = active_graph->getInputTensors();
+ const auto &graph_outputs = active_graph->getOutputTensors();
+
+ // Copy kernel inputs to active graph inputs.
+ for (size_t i = 0; i < getInputTensors().size() - 1; ++i)
+ {
+ LUCI_INTERPRETER_CHECK(graph_inputs[i]->element_type() == input(i)->element_type());
+ graph_inputs[i]->resize(input(i)->shape());
+
+ const int32_t num_elements = input(i)->shape().num_elements();
+ const std::size_t element_size = getDataTypeSize(input(i)->element_type());
+ // TODO: Think about how allocate memory for output in main graph
+ active_graph->configureAllocations(graph_inputs[i]);
+ std::memcpy(graph_inputs[i]->data<void>(), input(i)->data<void>(), num_elements * element_size);
+ }
+
+ active_graph->execute();
+
+ // Copy graph outputs to kernel outputs.
+ for (size_t i = 0; i < getOutputTensors().size(); ++i)
+ {
+ LUCI_INTERPRETER_CHECK(graph_outputs[i]->element_type() == output(i)->element_type());
+ output(i)->resize(graph_outputs[i]->shape());
+ // TODO: Think about how allocate memory for output in main graph
+ active_graph->configureAllocations(output(i));
+
+ const int32_t num_elements = output(i)->shape().num_elements();
+ const std::size_t element_size = getDataTypeSize(output(i)->element_type());
+ std::memcpy(output(i)->data<void>(), graph_outputs[i]->data<void>(),
+ num_elements * element_size);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.h b/compiler/luci-micro/luci-interpreter/src/kernels/If.h
new file mode 100644
index 000000000..fa6ab371a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_IF_H
+#define LUCI_INTERPRETER_KERNELS_IF_H
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class If : public Kernel
+{
+public:
+ If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs,
+ RuntimeGraph *then_graph, RuntimeGraph *else_graph);
+
+ const Tensor *cond() const { return _inputs[0]; }
+ const Tensor *input(int index) const { return _inputs[1 + index]; }
+ Tensor *output(int index) const { return _outputs[index]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ RuntimeGraph *const _then_graph;
+ RuntimeGraph *const _else_graph;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_IF_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp
new file mode 100644
index 000000000..c5f4faf75
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeModule.h"
+#include "kernels/Add.h"
+#include "kernels/If.h"
+#include "kernels/Mul.h"
+#include "kernels/TestUtils.h"
+
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class IfTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+RuntimeGraph *buildAddSubgraph(RuntimeModule *module, IMemoryManager *memory_manager)
+{
+ RuntimeGraph *graph = module->addGraph(memory_manager);
+ Tensor *input1 = graph->addTensor(
+ std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+ Tensor *input2 = graph->addTensor(
+ std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+ Tensor *output = graph->addTensor(
+ std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+
+ memory_manager->allocate_memory(*input1);
+ memory_manager->allocate_memory(*input2);
+ memory_manager->allocate_memory(*output);
+
+ graph->setInputTensors({input1, input2});
+ graph->setOutputTensors({output});
+
+ AddParams params{};
+ params.activation = Activation::NONE;
+ graph->addKernel(std::make_unique<Add>(input1, input2, output, params));
+
+ return graph;
+}
+
+RuntimeGraph *buildMulSubgraph(RuntimeModule *module, IMemoryManager *memory_manager)
+{
+ RuntimeGraph *graph = module->addGraph(memory_manager);
+ Tensor *input1 = graph->addTensor(
+ std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+ Tensor *input2 = graph->addTensor(
+ std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+ Tensor *output = graph->addTensor(
+ std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+
+ memory_manager->allocate_memory(*input1);
+ memory_manager->allocate_memory(*input2);
+ memory_manager->allocate_memory(*output);
+
+ graph->setInputTensors({input1, input2});
+ graph->setOutputTensors({output});
+
+ MulParams params{};
+ params.activation = Activation::NONE;
+ graph->addKernel(std::make_unique<Mul>(input1, input2, output, params));
+
+ return graph;
+}
+
+TEST_F(IfTest, CondTrue)
+{
+ Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true}, _memory_manager.get());
+ Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+ Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+ Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+ RuntimeModule module(nullptr);
+ RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+ RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+ If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+ kernel.configure();
+ _memory_manager->allocate_memory(output);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({6, 9}));
+}
+
+TEST_F(IfTest, CondFalse)
+{
+ Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false}, _memory_manager.get());
+ Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+ Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+ Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+ RuntimeModule module(nullptr);
+ RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+ RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+ If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+ kernel.configure();
+ _memory_manager->allocate_memory(output);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({5, 14}));
+}
+
+TEST_F(IfTest, InvalidCondType_NEG)
+{
+ Tensor cond = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+ Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+ Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+ Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+ RuntimeModule module(nullptr);
+ RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+ RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+ If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(IfTest, InvalidCondElementNum_NEG)
+{
+ Tensor cond = makeInputTensor<DataType::BOOL>({2}, {false, true}, _memory_manager.get());
+ Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+ Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+ Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+ RuntimeModule module(nullptr);
+ RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+ RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+ If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp
new file mode 100644
index 000000000..22a329be6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/InstanceNorm.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/common.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta,
+ Tensor *output, const InstanceNormParams &params)
+ : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params)
+{
+}
+
+void InstanceNorm::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type());
+ LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) ||
+ gamma()->shape().dim(0) == 1);
+ LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
+ LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) ||
+ beta()->shape().dim(0) == 1);
+ output()->resize(input()->shape());
+}
+
+void InstanceNorm::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void InstanceNorm::evalFloat() const
+{
+ float activation_min, activation_max;
+ calculateActivationRange(params().activation, &activation_min, &activation_max);
+ auto input_shape = getTensorShape(input());
+ auto output_shape = getTensorShape(output());
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1);
+ const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2);
+ const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+ const float *input_data = getTensorData<float>(input());
+ const float *gamma_data = getTensorData<float>(gamma());
+ auto gamma_shape = getTensorShape(gamma());
+ bool single_gamma = gamma_shape.DimensionsCount() == 1 && gamma_shape.Dims(0) == 1;
+ const float *beta_data = getTensorData<float>(beta());
+ auto beta_shape = getTensorShape(beta());
+ bool single_beta = beta_shape.DimensionsCount() == 1 && beta_shape.Dims(0) == 1;
+ float *output_data = getTensorData<float>(output());
+ for (int32_t batch = 0; batch < batches; batch++)
+ {
+ for (int32_t channel = 0; channel < channels; channel++)
+ {
+ double sum = 0.0f;
+ double square_sum = 0.0f;
+ int32_t size = heights * widths;
+ for (int32_t height = 0; height < heights; height++)
+ {
+ for (int32_t width = 0; width < widths; width++)
+ {
+ double input_val = input_data[tflite::Offset(input_shape, batch, height, width, channel)];
+ sum += input_val;
+ square_sum += (input_val * input_val);
+ }
+ }
+ double mean = sum / size;
+ double var = square_sum / size - mean * mean;
+
+ double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
+ double beta = single_beta ? beta_data[0] : beta_data[channel];
+ double a = gamma / (std::sqrt(var + params().epsilon));
+ double b = -mean * a + beta;
+
+ for (int32_t height = 0; height < heights; height++)
+ {
+ for (int32_t width = 0; width < widths; width++)
+ {
+ double input_value =
+ input_data[tflite::Offset(output_shape, batch, height, width, channel)];
+ double output_value = input_value * a + b;
+ output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
+ tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
+ activation_max);
+ }
+ }
+ }
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h
new file mode 100644
index 000000000..a70a84e0a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
+#define LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class InstanceNorm : public KernelWithParams<InstanceNormParams>
+{
+public:
+ InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta, Tensor *output,
+ const InstanceNormParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *gamma() const { return _inputs[1]; }
+ const Tensor *beta() const { return _inputs[2]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp
new file mode 100644
index 000000000..04400c3c0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "kernels/InstanceNorm.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class InstanceNormTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(InstanceNormTest, Simple)
+{
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 2, 1}, {1, 1, 1, 1}, _memory_manager.get());
+ Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+ Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ InstanceNormParams params{};
+ params.epsilon = 0.1f;
+ params.activation = Activation::NONE;
+
+ InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
+}
+
+TEST_F(InstanceNormTest, Single_gamma_beta)
+{
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get());
+ Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+ Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ InstanceNormParams params{};
+ params.epsilon = 0.1f;
+ params.activation = Activation::NONE;
+
+ InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 2}));
+}
+
+TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG)
+{
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get());
+ Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1}, _memory_manager.get());
+ Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ InstanceNormParams params{};
+ params.epsilon = 0.1f;
+ params.activation = Activation::NONE;
+
+ InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp
new file mode 100644
index 000000000..64222953f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Normalize.h"
+#include "kernels/Utils.h"
+
+#include "PALL2Normalize.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams &params)
+ : KernelWithParams<L2NormParams>({input}, {output}, params)
+{
+}
+
+void L2Normalize::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= 4);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+ output()->element_type() == DataType::U8);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ if (output()->element_type() == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(output()->scale() == (1. / 128.));
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == 128);
+ }
+ LUCI_INTERPRETER_CHECK(params().activation == Activation::NONE);
+ output()->resize(input()->shape());
+}
+
+void L2Normalize::execute() const
+{
+ switch (output()->element_type())
+ {
+ case DataType::FLOAT32:
+ eval<float>(0);
+ break;
+ case DataType::U8:
+ eval<uint8_t>(input()->zero_point());
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+template <typename T> void L2Normalize::eval(int32_t zero_point) const
+{
+ tflite::L2NormalizationParams op_params{};
+ op_params.input_zero_point = zero_point;
+ luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()),
+ getTensorData<T>(input()), getTensorShape(output()),
+ getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h
new file mode 100644
index 000000000..6c7dac698
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
+#define LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class L2Normalize : public KernelWithParams<L2NormParams>
+{
+public:
+ L2Normalize(const Tensor *input, Tensor *output, const L2NormParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> void eval(int32_t zero_point) const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp
new file mode 100644
index 000000000..6f960e8b4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "kernels/L2Normalize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ L2NormParams params{};
+ params.activation = Activation::NONE;
+
+ L2Normalize kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data,
+ std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::pair<float, int32_t> quant_param =
+ quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+ std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 128., 128);
+
+ L2NormParams params{};
+ params.activation = Activation::NONE;
+
+ L2Normalize kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, output_tensor.scale()));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class L2NormalizeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(L2NormalizeTest, DataTypes);
+
+TYPED_TEST(L2NormalizeTest, Simple)
+{
+ Check<TypeParam>({1, 1, 1, 6}, {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1},
+ {-0.55, 0.3, 0.35, 0.6, -0.35, 0.05});
+}
+
+TEST(L2NormalizeTest, ActivationType_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ L2NormParams params{};
+ params.activation = Activation::RELU6;
+
+ L2Normalize kernel(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(L2NormalizeTest, InvalidOutputQuantParam_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 1, 1, 6}, 1. / 64., 127, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 64., 127);
+
+ L2NormParams params{};
+ params.activation = Activation::NONE;
+
+ L2Normalize kernel(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp
new file mode 100644
index 000000000..5a88808d5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Pool2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALL2Pool2D.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
+ : KernelWithParams<Pool2DParams>({input}, {output}, params)
+{
+}
+
+void L2Pool2D::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+ int batches = input()->shape().dim(0);
+ int height = input()->shape().dim(1);
+ int width = input()->shape().dim(2);
+ int channels_out = input()->shape().dim(3);
+
+ // Matching GetWindowedOutputSize in TensorFlow.
+ auto padding = params().padding;
+ int out_width, out_height;
+ out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1);
+ out_height =
+ computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
+ _padding_width =
+ computePadding(params().stride_width, 1, width, params().filter_width, out_width);
+ _padding_height =
+ computePadding(params().stride_height, 1, height, params().filter_height, out_height);
+
+ LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
+ output()->resize({batches, out_height, out_width, channels_out});
+}
+
+void L2Pool2D::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ float activation_min, activation_max;
+ calculateActivationRange(params().activation, &activation_min, &activation_max);
+ tflite::PoolParams op_params;
+ op_params.stride_height = params().stride_height;
+ op_params.stride_width = params().stride_width;
+ op_params.filter_height = params().filter_height;
+ op_params.filter_width = params().filter_width;
+ op_params.padding_values.height = _padding_height;
+ op_params.padding_values.width = _padding_width;
+ op_params.float_activation_min = activation_min;
+ op_params.float_activation_max = activation_max;
+ luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h
new file mode 100644
index 000000000..d40f5f478
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_L2POOL2D_H
+#define LUCI_INTERPRETER_KERNELS_L2POOL2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class L2Pool2D : public KernelWithParams<Pool2DParams>
+{
+public:
+ L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ int32_t _padding_height = 0;
+ int32_t _padding_width = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_L2POOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp
new file mode 100644
index 000000000..7245456cb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Pool2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class L2Pool2DTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(L2Pool2DTest, FloatNone)
+{
+ Shape input_shape{1, 2, 4, 1};
+ std::vector<float> input_data{
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.activation = Activation::NONE;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 2;
+ params.stride_width = 2;
+
+ L2Pool2D kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{3.5, 6.5};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatRelu)
+{
+ Shape input_shape{1, 2, 4, 1};
+ std::vector<float> input_data{
+ -1, -6, 2, 4, //
+ -3, -2, 10, 7, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.activation = Activation::RELU;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 2;
+ params.stride_width = 2;
+
+ L2Pool2D kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{3.53553, 6.5};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatRelu1)
+{
+ Shape input_shape{1, 2, 4, 1};
+ std::vector<float> input_data{
+ -0.1, -0.6, 2, 4, //
+ -0.3, -0.2, 10, 7, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.activation = Activation::RELU_N1_TO_1;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 2;
+ params.stride_width = 2;
+
+ L2Pool2D kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0.353553, 1.0};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatRelu6)
+{
+ Shape input_shape{1, 2, 4, 1};
+ std::vector<float> input_data{
+ -0.1, -0.6, 2, 4, //
+ -0.3, -0.2, 10, 7, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.activation = Activation::RELU6;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 2;
+ params.stride_width = 2;
+
+ L2Pool2D kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0.353553, 6.0};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatPaddingSame)
+{
+ Shape input_shape{1, 2, 4, 1};
+ std::vector<float> input_data{
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pool2DParams params{};
+ params.padding = Padding::SAME;
+ params.activation = Activation::NONE;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 2;
+ params.stride_width = 2;
+
+ L2Pool2D kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{3.5, 6.5};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatPaddingSameStride)
+{
+ Shape input_shape{1, 2, 4, 1};
+ std::vector<float> input_data{
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pool2DParams params{};
+ params.padding = Padding::SAME;
+ params.activation = Activation::NONE;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 1;
+ params.stride_width = 1;
+
+ L2Pool2D kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0};
+ // NOTE with NEON+ruy, error is #1=-1.14441e-05, #6=-1.81198e-05
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, 1.0e-4f));
+ // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatPaddingValidStride)
+{
+ Shape input_shape{1, 2, 4, 1};
+ std::vector<float> input_data{
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.activation = Activation::NONE;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 1;
+ params.stride_width = 1;
+
+ L2Pool2D kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{3.5, 6.0, 6.5};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, InvalidInputShape_NEG)
+{
+ Shape input_shape{1, 2, 4};
+ std::vector<float> input_data{
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.activation = Activation::NONE;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 1;
+ params.stride_width = 1;
+
+ L2Pool2D kernel(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(L2Pool2DTest, InvalidInputOutputType_NEG)
+{
+ Shape input_shape{1, 2, 4};
+ std::vector<float> input_data{
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.activation = Activation::NONE;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 1;
+ params.stride_width = 1;
+
+ L2Pool2D kernel(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp
new file mode 100644
index 000000000..3833a55e8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LeakyRelu.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+#include "PALLeakyRelu.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams &params)
+ : KernelWithParams<LeakyReluParams>({input}, {output}, params)
+{
+}
+
+void LeakyRelu::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ if (input()->element_type() == DataType::U8)
+ {
+ double alpha_multiplier = input()->scale() * params().alpha / output()->scale();
+ quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha);
+ double identity_multiplier = input()->scale() / output()->scale();
+ quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+ }
+ output()->resize(input()->shape());
+}
+
+void LeakyRelu::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void LeakyRelu::evalFloat() const
+{
+ tflite::LeakyReluParams op_params{};
+ op_params.alpha = params().alpha;
+ luci_interpreter_pal::LeakyRelu(op_params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void LeakyRelu::evalQuantized() const
+{
+ tflite::LeakyReluParams op_params{};
+ op_params.input_offset = input()->zero_point();
+ op_params.output_offset = output()->zero_point();
+ op_params.output_multiplier_alpha = _output_multiplier_alpha;
+ op_params.output_shift_alpha = _output_shift_alpha;
+ op_params.output_multiplier_identity = _output_multiplier_identity;
+ op_params.output_shift_identity = _output_shift_identity;
+
+ tflite::reference_ops::QuantizeLeakyRelu(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h
new file mode 100644
index 000000000..e66f404df
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LEAKYRELU_H
+#define LUCI_INTERPRETER_KERNELS_LEAKYRELU_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LeakyRelu : public KernelWithParams<LeakyReluParams>
+{
+public:
+ LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+
+private:
+ int32_t _output_multiplier_alpha = 0;
+ int _output_shift_alpha = 0;
+ int32_t _output_multiplier_identity = 0;
+ int _output_shift_identity = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LEAKYRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp
new file mode 100644
index 000000000..0f6263b57
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LeakyRelu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data,
+ float alpha)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType element_type = getElementType<T>();
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(element_type);
+
+ LeakyReluParams params{};
+ params.alpha = alpha;
+
+ LeakyRelu kernel(&input_tensor, &output_tensor, params);
+
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data,
+ std::initializer_list<float> output_data, float alpha)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ const float quantized_tolerance = getTolerance(-8, 127.f / 16.f, 255);
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-8, 127.f / 16.f);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ LeakyReluParams params{};
+ params.alpha = alpha;
+
+ LeakyRelu kernel(&input_tensor, &output_tensor, params);
+
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <typename T> class LeakReluTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(LeakReluTest, DataTypes);
+
+TYPED_TEST(LeakReluTest, Simple)
+{
+ Check<TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3},
+ /*input_data=*/
+ {
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -1.0f, -2.0f, // Row 2
+ },
+ /*output_data=*/
+ {
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -0.5f, -1.0f, // Row 2
+ },
+ /*alpha=*/0.5f);
+
+ SUCCEED();
+}
+
+TEST(LeakReluTest, IvalidInputOutputType_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3},
+ {
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -1.0f, -2.0f, // Row 2
+ },
+ memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ LeakyReluParams params{};
+ params.alpha = 0.5f;
+
+ LeakyRelu kernel(&input_tensor, &output_tensor, params);
+
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp
new file mode 100644
index 000000000..8d26ff297
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Less.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Less::Less(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Less::configure()
+{
+ LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+ if (x()->element_type() == DataType::U8)
+ {
+ quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+ quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+ }
+ output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Less::execute() const
+{
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Less::evalFloat() const
+{
+ const auto x_data = getTensorData<float>(x());
+ const auto y_data = getTensorData<float>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLess(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::Less(op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
+template <typename T> void Less::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+void Less::evalQuantized() const
+{
+ const auto x_data = getTensorData<uint8_t>(x());
+ const auto y_data = getTensorData<uint8_t>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.left_shift = 8;
+ op_params.input1_offset = -x()->zero_point(); // Note the '-'
+ op_params.input1_shift = _x_shift;
+ op_params.input1_multiplier = _x_multiplier;
+ op_params.input2_offset = -y()->zero_point(); // Note the '-'
+ op_params.input2_shift = _y_shift;
+ op_params.input2_multiplier = _y_multiplier;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLessWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::LessWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.h b/compiler/luci-micro/luci-interpreter/src/kernels/Less.h
new file mode 100644
index 000000000..e27bb689c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LESS_H
+#define LUCI_INTERPRETER_KERNELS_LESS_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Less : public Kernel
+{
+public:
+ Less(const Tensor *x, const Tensor *y, Tensor *output);
+
+ const Tensor *x() const { return _inputs[0]; }
+ const Tensor *y() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ template <typename T> void evalInteger() const;
+ void evalQuantized() const;
+
+private:
+ int32_t _x_multiplier = 0;
+ int _x_shift = 0;
+ int32_t _y_multiplier = 0;
+ int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LESS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp
new file mode 100644
index 000000000..8c5963363
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Less.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LessTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LessTest, FloatSimple)
+{
+ std::vector<float> x_data{
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.7, 0.5, // Row 1
+ -1, 0, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, false, // Row 1
+ false, false, true, // Row 2
+ };
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(LessTest, FloatBroardcast)
+{
+ std::vector<float> x_data{
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ -1, 0, 1, // Row 3
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.7, 0.5, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, false, // Row 1
+ false, true, true, // Row 2
+ true, true, false, // Row 3
+ };
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+ std::vector<bool> ref_output_data{true, false, false};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -4, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value + 1, -2, max_value - 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, true, // Row 1
+ false, false, false, // Row 2
+ false, true, true, // Row 3
+ true, false, false, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(LessTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(LessTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(LessTest, Uint8Quantized)
+{
+ std::vector<float> x_data{
+ 0.5, 0.6, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.6, 0.55, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, false, false, // Row 1
+ false, true, false, true, // Row 2
+ };
+
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessTest, Uint8QuantizedRescale)
+{
+ std::vector<float> x_data{
+ 0.5, 0.6, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.6, 0.6, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, false, false, // Row 1
+ false, true, false, true, // Row 2
+ };
+
+ std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessTest, Uint8QuantizedBroadcast)
+{
+ std::vector<float> x_data{
+ 0.4, -0.8, 0.7, 0.3, // Row 1
+ -0.5, 0.1, 0, 0.5, // Row 2
+ 1, 0, 0.05, -1, // Row 3
+ };
+
+ std::vector<float> y_data{
+ -1, 0.05, 0, 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, false, true, // Row 1
+ false, false, false, true, // Row 2
+ false, true, false, true, // Row 3
+ };
+
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessTest, Input_Type_Mismatch_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Input_Output_Type_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp
new file mode 100644
index 000000000..b474bc47a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LessEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LessEqual::LessEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void LessEqual::configure()
+{
+ LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+ if (x()->element_type() == DataType::U8)
+ {
+ quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+ quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+ }
+ output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void LessEqual::execute() const
+{
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void LessEqual::evalFloat() const
+{
+ const auto x_data = getTensorData<float>(x());
+ const auto y_data = getTensorData<float>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLessEqual(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::LessEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+ y_data, getTensorShape(output()), output_data);
+ }
+}
+
+template <typename T> void LessEqual::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+void LessEqual::evalQuantized() const
+{
+ const auto x_data = getTensorData<uint8_t>(x());
+ const auto y_data = getTensorData<uint8_t>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.left_shift = 8;
+ op_params.input1_offset = -x()->zero_point(); // Note the '-'
+ op_params.input1_shift = _x_shift;
+ op_params.input1_multiplier = _x_multiplier;
+ op_params.input2_offset = -y()->zero_point(); // Note the '-'
+ op_params.input2_shift = _y_shift;
+ op_params.input2_multiplier = _y_multiplier;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLessEqualWithScaling(
+ op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+ else
+ {
+ tflite::reference_ops::LessEqualWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h
new file mode 100644
index 000000000..f82ea90d4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LessEqual : public Kernel
+{
+public:
+ LessEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+ const Tensor *x() const { return _inputs[0]; }
+ const Tensor *y() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ template <typename T> void evalInteger() const;
+ void evalQuantized() const;
+
+private:
+ int32_t _x_multiplier = 0;
+ int _x_shift = 0;
+ int32_t _y_multiplier = 0;
+ int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp
new file mode 100644
index 000000000..b2e2fa7a1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LessEqual.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LessEqualTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LessEqualTest, FloatSimple)
+{
+ std::vector<float> x_data{
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.7, 0.5, // Row 1
+ -1, 0, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ true, true, false, // Row 1
+ false, true, true, // Row 2
+ };
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(LessEqualTest, FloatBroardcast)
+{
+ std::vector<float> x_data{
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ -1, 0, 1, // Row 3
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.7, 0.5, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, true, false, // Row 1
+ false, true, true, // Row 2
+ true, true, false, // Row 3
+ };
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+ std::vector<bool> ref_output_data{true, false, true};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -4, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value + 1, -2, max_value - 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, true, // Row 1
+ false, false, false, // Row 2
+ false, true, true, // Row 3
+ true, true, false, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(LessEqualTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(LessEqualTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(LessEqualTest, Uint8Quantized)
+{
+ std::vector<float> x_data{
+ 0.5, 0.6, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.6, 0.55, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ true, true, false, false, // Row 1
+ false, true, false, true, // Row 2
+ };
+
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessEqualTest, Uint8QuantizedRescale)
+{
+ std::vector<float> x_data{
+ 0.5, 0.6, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.6, 0.6, 0.5, // Row 1
+ -1, 0.05, 0, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ true, true, false, false, // Row 1
+ false, true, false, true, // Row 2
+ };
+
+ std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessEqualTest, Uint8QuantizedBroadcast)
+{
+ std::vector<float> x_data{
+ 0.4, -0.8, 0.7, 0.3, // Row 1
+ -0.5, 0.1, 0, 0.5, // Row 2
+ 1, 0, 0.05, -1, // Row 3
+ };
+
+ std::vector<float> y_data{
+ -1, 0.05, 0, 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, false, true, // Row 1
+ false, false, true, true, // Row 2
+ false, true, false, true, // Row 3
+ };
+
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessEqualTest, Input_Type_Mismatch_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Input_Output_Type_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
new file mode 100644
index 000000000..a2bf442b0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LocalResponseNormalization.h"
+
+#include "kernels/Utils.h"
+
+#include "PALLocalResponseNormalization.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LocalResponseNormalization::LocalResponseNormalization(
+ const Tensor *input, Tensor *output, const LocalResponseNormalizationParams &params)
+ : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
+{
+}
+
+void LocalResponseNormalization::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ output()->resize(input()->shape());
+}
+
+void LocalResponseNormalization::execute() const
+{
+ switch (output()->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::LocalResponseNormalizationParams op_params;
+ op_params.range = params().radius;
+ op_params.bias = params().bias;
+ op_params.alpha = params().alpha;
+ op_params.beta = params().beta;
+ luci_interpreter_pal::LocalResponseNormalization(
+ op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h
new file mode 100644
index 000000000..60408a104
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
+#define LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LocalResponseNormalization : public KernelWithParams<LocalResponseNormalizationParams>
+{
+public:
+ LocalResponseNormalization(const Tensor *input, Tensor *output,
+ const LocalResponseNormalizationParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
new file mode 100644
index 000000000..4a9d4739f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LocalResponseNormalization.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LocalResponseNormalizationTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LocalResponseNormalizationTest, SameAsL2Norm)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ LocalResponseNormalizationParams params{};
+ params.radius = 20;
+ params.bias = 0.0;
+ params.alpha = 1.0;
+ params.beta = 0.5;
+
+ LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}));
+}
+
+TEST_F(LocalResponseNormalizationTest, WithAlpha)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ LocalResponseNormalizationParams params{};
+ params.radius = 20;
+ params.bias = 0.0;
+ params.alpha = 4.0;
+ params.beta = 0.5;
+
+ LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025}));
+}
+
+TEST_F(LocalResponseNormalizationTest, WithBias)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ LocalResponseNormalizationParams params{};
+ params.radius = 20;
+ params.bias = 9.0;
+ params.alpha = 4.0;
+ params.beta = 0.5;
+
+ LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02}));
+}
+
+TEST_F(LocalResponseNormalizationTest, SmallRadius)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ LocalResponseNormalizationParams params{};
+ params.radius = 2;
+ params.bias = 9.0;
+ params.alpha = 4.0;
+ params.beta = 0.5;
+
+ LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266}));
+}
+
+TEST_F(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ LocalResponseNormalizationParams params{};
+ params.radius = 20;
+ params.bias = 0.0;
+ params.alpha = 1.0;
+ params.beta = 0.5;
+
+ LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LocalResponseNormalizationTest, InvalidInputOutputType_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ LocalResponseNormalizationParams params{};
+ params.radius = 20;
+ params.bias = 0.0;
+ params.alpha = 1.0;
+ params.beta = 0.5;
+
+ LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp
new file mode 100644
index 000000000..79c315338
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogSoftmax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/log_softmax.h>
+
+#include "PALLogSoftmax.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogSoftmax::LogSoftmax(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogSoftmax::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ if (input()->element_type() == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(output()->scale() == 16. / 256);
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == 255);
+
+ tflite::SoftmaxParams params{};
+
+ params.table = _table;
+ params.beta = 1.0;
+ luci_interpreter_pal::PopulateSoftmaxLookupTable(&params, input()->scale(), params.beta);
+ }
+ output()->resize(input()->shape());
+}
+
+void LogSoftmax::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void LogSoftmax::evalFloat() const
+{
+ tflite::SoftmaxParams params{};
+ tflite::reference_ops::LogSoftmax(params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void LogSoftmax::evalQuantized() const
+{
+ const auto input_shape = getTensorShape(input());
+ const auto output_shape = getTensorShape(output());
+ const auto input_scale = input()->scale();
+ uint8_t *output_data = getTensorData<uint8_t>(output());
+ const uint8_t *input_data = getTensorData<uint8_t>(input());
+ const float beta = 1.0;
+
+ tflite::SoftmaxParams params{};
+
+ params.table = const_cast<float *>(_table);
+ params.zero_point = output()->zero_point();
+ params.scale = output()->scale();
+
+ luci_interpreter_pal::InitializeParams(&params, input_scale, beta);
+ luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+ output_data);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h
new file mode 100644
index 000000000..18477fbe3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
+#define LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogSoftmax : public Kernel
+{
+public:
+ LogSoftmax(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+
+ float _table[256];
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp
new file mode 100644
index 000000000..50dcd5c28
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogSoftmax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogSoftmaxTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogSoftmaxTest, Float)
+{
+ Shape input_shape{2, 4};
+ std::vector<float> input_data{
+ 0, -6, 2, 4, //
+ 3, -2, 10, 1, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ LogSoftmax kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ -4.14297, -10.14297, -2.14297, -.142971, //
+ -7.00104, -12.00104, -.00104087, -9.00104, //
+ };
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(LogSoftmaxTest, Uint8)
+{
+ float kMin = -10;
+ float kMax = 10;
+ float kLogSoftmaxQuantizedTolerance = 16. / 256;
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
+ std::vector<float> input_data{
+ 0, -6, 2, 4, //
+ 3, -2, 10, 1, //
+ };
+ Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
+
+ LogSoftmax kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ -4.14297, -10.14297, -2.14297, -.142971, //
+ -7.00104, -12.00104, -.00104087, -9.00104, //
+ };
+ std::vector<int32_t> ref_output_shape{2, 4};
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, kLogSoftmaxQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+ ::testing::ElementsAreArray({189, 93, 221, 253, 142, 63, 255, 111}));
+}
+
+TEST_F(LogSoftmaxTest, InvalidInputOutputType_NEG)
+{
+ std::vector<float> input_data{
+ 0, -6, 2, 4, //
+ 3, -2, 10, 1, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 4}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
+
+ LogSoftmax kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogSoftmaxTest, InvalidOutputQuantParam_NEG)
+{
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-10, 10);
+ std::vector<float> input_data{
+ 0, -6, 2, 4, //
+ 3, -2, 10, 1, //
+ };
+ Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, 20. / 256, 255);
+
+ LogSoftmax kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp
new file mode 100644
index 000000000..8e7263231
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalAnd.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalAnd::LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output)
+ : Kernel({input1, input2}, {output})
+{
+}
+
+void LogicalAnd::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void LogicalAnd::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::BOOL:
+ evalLogicalAnd();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+inline void LogicalAnd::evalLogicalAnd() const
+{
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
+ getTensorShape(input2()), getTensorData<bool>(input2()),
+ getTensorShape(output()), getTensorData<bool>(output()),
+ [](bool x, bool y) { return x && y; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h
new file mode 100644
index 000000000..46b889986
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGICALAND_H
+#define LUCI_INTERPRETER_KERNELS_LOGICALAND_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogicalAnd : public Kernel
+{
+public:
+ LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+ const Tensor *input1() const { return _inputs[0]; }
+ const Tensor *input2() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ inline void evalLogicalAnd() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGICALAND_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp
new file mode 100644
index 000000000..21b7951e0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalAnd.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalAndTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalAndTest, Basic)
+{
+ Shape input_shape{1, 1, 1, 4};
+ Tensor input_tensor1 =
+ makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::BOOL>(input_shape, {true, false, true, false}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor),
+ ::testing::ElementsAre(true, false, false, false));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalAndTest, Broadcast)
+{
+ Tensor input_tensor1 = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+ _memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {true}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor),
+ ::testing::ElementsAre(true, false, false, true));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalAndTest, MismatchInputType_NEG)
+{
+ Tensor input1_tensor =
+ makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogicalAndTest, InputTypeInvalid_NEG)
+{
+ Tensor input1_tensor =
+ makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp
new file mode 100644
index 000000000..65ab961aa
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalNot.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalNot::LogicalNot(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogicalNot::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ output()->resize(input()->shape());
+}
+
+void LogicalNot::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::BOOL:
+ evalLogicalNot();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+inline void LogicalNot::evalLogicalNot() const
+{
+ const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+ bool *output_data = getTensorData<bool>(output());
+ const bool *input_data = getTensorData<bool>(input());
+ for (int i = 0; i < size; ++i)
+ {
+ output_data[i] = !input_data[i];
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h
new file mode 100644
index 000000000..1608fafa5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGICALNOT_H
+#define LUCI_INTERPRETER_KERNELS_LOGICALNOT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogicalNot : public Kernel
+{
+public:
+ LogicalNot(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ inline void evalLogicalNot() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGICALNOT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp
new file mode 100644
index 000000000..3cbf27f6b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalNot.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalNotTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalNotTest, Basic)
+{
+ Shape input_shape{1, 1, 1, 4};
+ Tensor input_tensor =
+ makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LogicalNot kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor),
+ ::testing::ElementsAre(false, true, true, false));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalNotTest, OutputTypeInvalid_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+ _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ LogicalNot kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogicalNotTest, InputTypeInvalid_NEG)
+{
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LogicalNot kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp
new file mode 100644
index 000000000..f289ca64f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalOr.h"
+
+#include "kernels/Utils.h"
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalOr::LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output)
+ : Kernel({input1, input2}, {output})
+{
+}
+
+void LogicalOr::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == DataType::BOOL);
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void LogicalOr::execute() const
+{
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
+ getTensorShape(input2()), getTensorData<bool>(input2()),
+ getTensorShape(output()), getTensorData<bool>(output()),
+ [](bool x, bool y) { return x || y; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h
new file mode 100644
index 000000000..88606483f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGICALOR_H
+#define LUCI_INTERPRETER_KERNELS_LOGICALOR_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogicalOr : public Kernel
+{
+public:
+ LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+ const Tensor *input1() const { return _inputs[0]; }
+ const Tensor *input2() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGICALOR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp
new file mode 100644
index 000000000..d65a69a5e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalOr.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalOrTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalOrTest, Basic)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+ _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, true, false},
+ _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor),
+ ::testing::ElementsAre(true, false, true, true));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalOrTest, Broadcast)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+ _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor),
+ ::testing::ElementsAre(true, false, false, true));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalOrTest, MismatchInputType_NEG)
+{
+ Tensor input1_tensor =
+ makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogicalOrTest, InputTypeInvalid_NEG)
+{
+ Tensor input1_tensor =
+ makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp
new file mode 100644
index 000000000..58e4f185d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Logistic.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/logistic.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Logistic::Logistic(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Logistic::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ if (input()->element_type() == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(output()->scale() == 1. / 256);
+ populateLookupTable();
+ }
+ output()->resize(input()->shape());
+}
+
+void Logistic::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Logistic::evalFloat() const
+{
+ tflite::reference_ops::Logistic(getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void Logistic::evalQuantized() const
+{
+ const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+ uint8_t *output_data = getTensorData<uint8_t>(output());
+ const uint8_t *input_data = getTensorData<uint8_t>(input());
+ for (int i = 0; i < size; ++i)
+ {
+ output_data[i] = getTableValue(input_data[i]);
+ }
+}
+
+void Logistic::populateLookupTable()
+{
+ const auto input_scale = static_cast<double>(input()->scale());
+ const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
+ const auto output_scale = static_cast<double>(output()->scale());
+ const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
+ const float inverse_scale = 1 / output_scale;
+ int32_t maxval = std::numeric_limits<uint8_t>::max();
+ int32_t minval = std::numeric_limits<uint8_t>::min();
+ for (int32_t val = minval; val <= maxval; ++val)
+ {
+ const float dequantized = input_scale * (val - input_zero_point);
+ const float transformed = 1.0f / (1.0f + std::exp(-dequantized));
+ const float rescaled = std::round(transformed * inverse_scale);
+ const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+ setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
+ static_cast<uint8_t>(val));
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h
new file mode 100644
index 000000000..31de6adf0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGISTIC_H
+#define LUCI_INTERPRETER_KERNELS_LOGISTIC_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Logistic : public Kernel
+{
+public:
+ Logistic(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+ void populateLookupTable();
+ void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; };
+ uint8_t getTableValue(uint8_t idx) const { return _table[idx]; };
+
+private:
+ uint8_t _table[256]{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGISTIC_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp
new file mode 100644
index 000000000..5a1ea669c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Logistic.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<getElementType<T>()>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(getElementType<T>());
+
+ Logistic kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data,
+ std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ std::pair<float, int32_t> input_quant_param =
+ quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
+
+ Logistic kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, output_tensor.scale() * 2));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class LogisticTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(LogisticTest, DataTypes);
+
+TYPED_TEST(LogisticTest, Simple)
+{
+ Check<TypeParam>(
+ {89}, {89},
+ {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636,
+ -8.6363636364, -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000,
+ -7.2727272727, -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364,
+ -5.9090909091, -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727,
+ -4.5454545455, -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091,
+ -3.1818181818, -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455,
+ -1.8181818182, -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818,
+ -0.4545454545, -0.2272727273, 0.0000000000, 0.2272727273, 0.4545454545, 0.6818181818,
+ 0.9090909091, 1.1363636364, 1.3636363636, 1.5909090909, 1.8181818182, 2.0454545455,
+ 2.2727272727, 2.5000000000, 2.7272727273, 2.9545454545, 3.1818181818, 3.4090909091,
+ 3.6363636364, 3.8636363636, 4.0909090909, 4.3181818182, 4.5454545455, 4.7727272727,
+ 5.0000000000, 5.2272727273, 5.4545454545, 5.6818181818, 5.9090909091, 6.1363636364,
+ 6.3636363636, 6.5909090909, 6.8181818182, 7.0454545455, 7.2727272727, 7.5000000000,
+ 7.7272727273, 7.9545454545, 8.1818181818, 8.4090909091, 8.6363636364, 8.8636363636,
+ 9.0909090909, 9.3181818182, 9.5454545455, 9.7727272727, 10.0000000000},
+ {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198,
+ 0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786,
+ 0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065,
+ 0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576,
+ 0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562,
+ 0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805,
+ 0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241,
+ 0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759,
+ 0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195,
+ 0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438,
+ 0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424,
+ 0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935,
+ 0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214,
+ 0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802,
+ 0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021});
+}
+
+TEST(LogisticTest, IvalidInputOutputType_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Shape input_shape = {1};
+ std::vector<float> input_data{10};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
+
+ Logistic kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(LogisticTest, IvalidQuantParam_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Shape input_shape = {2};
+ std::vector<float> input_data{-10, 10};
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-10, 10);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0);
+
+ Logistic kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp
new file mode 100644
index 000000000..8d9760ff2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MaxPool2D.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+MaxPool2D::MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
+ : KernelWithParams<Pool2DParams>({input}, {output}, params)
+{
+}
+
+void MaxPool2D::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ assert(input()->shape().num_dims() == 4);
+ const Shape &input_shape = input()->shape();
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t depth = input_shape.dim(3);
+
+ const int32_t output_height =
+ computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
+ const int32_t output_width =
+ computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+
+ _padding_height =
+ computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+ _padding_width =
+ computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+
+ output()->resize({batches, output_height, output_width, depth});
+ if (input()->element_type() == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+ }
+ else if (input()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+ }
+}
+
+void MaxPool2D::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ case DataType::S16:
+ evalSInt16();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void MaxPool2D::evalFloat() const
+{
+ float activation_min{};
+ float activation_max{};
+ calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+ tflite::PoolParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.filter_height = _params.filter_height;
+ params.filter_width = _params.filter_width;
+ params.float_activation_min = activation_min;
+ params.float_activation_max = activation_max;
+
+ tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void MaxPool2D::evalQuantized() const
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::PoolParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.filter_height = _params.filter_height;
+ params.filter_width = _params.filter_width;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void MaxPool2D::evalSInt16() const
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::PoolParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.filter_height = _params.filter_height;
+ params.filter_width = _params.filter_width;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ tflite::reference_integer_ops::MaxPool(
+ params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+ getTensorShape(output()), getTensorData<int16_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h
new file mode 100644
index 000000000..bb7666305
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
+#define LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class MaxPool2D : public KernelWithParams<Pool2DParams>
+{
+public:
+ MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+ void evalSInt16() const;
+
+private:
+ int32_t _padding_height{};
+ int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp
new file mode 100644
index 000000000..44f2a222f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MaxPool2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MaxPool2DTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaxPool2DTest, Float)
+{
+ Shape input_shape{1, 3, 5, 1};
+ std::vector<float> input_data{
+ 1, -1, 0, -2, 2, //
+ -7, -6, -5, -4, -3, //
+ 5, 4, 3, 6, 7, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 3;
+ params.stride_height = 1;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ MaxPool2D kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 1, 2, //
+ 5, 6, //
+ };
+ std::initializer_list<int32_t> ref_output_shape{1, 2, 2, 1};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MaxPool2DTest, Uint8)
+{
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375);
+ std::vector<float> input_data{
+ 0, -6, 12, 4, //
+ -3, -2, 10, 7, //
+ };
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 2;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ MaxPool2D kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0.0, 6.0};
+ std::initializer_list<int32_t> ref_output_shape{1, 1, 2, 1};
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MaxPool2DTest, SInt16)
+{
+ Shape input_shape{1, 3, 5, 1};
+ std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+ std::vector<float> input_data{
+ 1, -1, 0, -2, 2, //
+ -7, -6, -5, -4, -3, //
+ 5, 4, 3, 6, 7, //
+ };
+ std::vector<float> ref_output_data{
+ 1, 2, //
+ 5, 6, //
+ };
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 3;
+ params.stride_height = 1;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ MaxPool2D kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp
new file mode 100644
index 000000000..b102b5e27
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Maximum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output)
+ : Kernel({input1, input2}, {output})
+{
+}
+
+void Maximum::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Maximum::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalMaximum<float>();
+ break;
+ case DataType::U8:
+ evalMaximum<uint8_t>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+template <typename T> inline void Maximum::evalMaximum() const
+{
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()),
+ [](T x, T y) { return std::max(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h
new file mode 100644
index 000000000..3c99e69c7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MAXIMUM_H
+#define LUCI_INTERPRETER_KERNELS_MAXIMUM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Maximum : public Kernel
+{
+public:
+ Maximum(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+ const Tensor *input1() const { return _inputs[0]; }
+ const Tensor *input2() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> inline void evalMaximum() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MAXIMUM_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp
new file mode 100644
index 000000000..e4a505b03
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Maximum.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MaximumTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaximumTest, Float)
+{
+ Shape input_shape{3, 1, 2};
+ std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+ std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+ Tensor input_tensor1 =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{1.0, 0.0, 1.0, 12.0, -2.0, -1.43};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(MaximumTest, Uint8)
+{
+ Shape input_shape{3, 1, 2};
+ std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
+ std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
+ Tensor input_tensor1 =
+ makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<int32_t> ref_output_shape{2, 4};
+ EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+ ::testing::ElementsAreArray({1, 0, 2, 12, 255, 23}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp
new file mode 100644
index 000000000..8e65e0d6d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mean.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
+{
+ params->axis_count = num_axes;
+ for (int i = 0; i < num_axes; ++i)
+ {
+ params->axis[i] = static_cast<int16>(axes_data[i]);
+ }
+ for (int i = num_axes; i < 4; ++i)
+ {
+ params->axis[i] = 1;
+ }
+}
+
+// Returns the number of axes that will be reduced. Removes duplicates.
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
+{
+ int reduction_count = num_axes;
+ for (int i = 0; i < num_axes; ++i)
+ {
+ int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
+ assert(current >= 0 && current < input_num_dims);
+ for (int j = 0; j < i; j++)
+ {
+ int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
+ // This checks for duplicate axis
+ if (current == previous)
+ {
+ --reduction_count;
+ break;
+ }
+ }
+ }
+ return reduction_count;
+}
+
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
+ bool keep_dims)
+{
+ int input_num_dims = input_shape.num_dims();
+ if (input_num_dims == 0)
+ {
+ return Shape(0);
+ }
+
+ if (keep_dims)
+ {
+ Shape output_shape(input_num_dims);
+ for (int idx = 0; idx < input_num_dims; ++idx)
+ {
+ bool is_axis = false;
+ for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+ {
+ if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+ {
+ is_axis = true;
+ break;
+ }
+ }
+ if (is_axis)
+ {
+ output_shape.dim(idx) = 1;
+ }
+ else
+ {
+ output_shape.dim(idx) = input_shape.dim(idx);
+ }
+ }
+ return output_shape;
+ }
+ else
+ {
+ int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
+ Shape output_shape(input_num_dims - num_reduce_axes);
+ int num_skip_axes = 0;
+ for (int idx = 0; idx < input_num_dims; ++idx)
+ {
+ bool is_axis = false;
+ for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+ {
+ if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+ {
+ ++num_skip_axes;
+ is_axis = true;
+ break;
+ }
+ }
+ if (!is_axis)
+ {
+ output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
+ }
+ }
+ return output_shape;
+ }
+}
+
+Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+ Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params)
+ : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum},
+ params)
+{
+}
+
+void Mean::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+ if (input()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+ }
+
+ const Shape &input_shape = input()->shape();
+ int input_num_dims = input_shape.num_dims();
+
+ const auto *axes_data = getTensorData<int32_t>(axes());
+ int num_axes = axes()->shape().num_elements();
+ assert(num_axes <= 4);
+
+ Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
+ output()->resize(output_shape);
+
+ tflite::MeanParams params{};
+ resolveAxes(axes_data, num_axes, &params);
+ _need_temporaries = !(
+ _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+ ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
+ if (_need_temporaries)
+ {
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
+ temp_index->resize(Shape(input_num_dims));
+ resolved_axes->resize(Shape(num_axes));
+ temp_sum->resize(output()->shape());
+ }
+ else
+ {
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
+ temp_index->set_allocatable(false);
+ resolved_axes->set_allocatable(false);
+ temp_sum->set_allocatable(false);
+ }
+}
+
+void Mean::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Mean::evalFloat() const
+{
+ const Shape &input_shape = input()->shape();
+ int input_num_dims = input_shape.num_dims();
+ const auto *axes_data = getTensorData<int32_t>(axes());
+ int num_axes = axes()->shape().num_elements();
+
+ tflite::MeanParams params{};
+ resolveAxes(axes_data, num_axes, &params);
+
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
+ // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+ if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+ ((params.axis[0] == 1 && params.axis[1] == 2) ||
+ (params.axis[0] == 2 && params.axis[1] == 1)))
+ {
+ tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
+ input()->shape().num_dims(), getTensorData<float>(output()),
+ getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+ axes_data, num_axes, _params.keep_dims,
+ getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+ getTensorData<float>(temp_sum));
+ }
+}
+
+void Mean::evalQuantized() const
+{
+ const Shape &input_shape = input()->shape();
+ int input_num_dims = input_shape.num_dims();
+ const auto *axes_data = getTensorData<int32_t>(axes());
+ int num_axes = axes()->shape().num_elements();
+
+ tflite::MeanParams params{};
+ resolveAxes(axes_data, num_axes, &params);
+
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
+ // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+ if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+ ((params.axis[0] == 1 && params.axis[1] == 2) ||
+ (params.axis[0] == 2 && params.axis[1] == 1)))
+ {
+ tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ input()->zero_point(), input()->scale(), getTensorShape(output()),
+ getTensorData<uint8_t>(output()), output()->zero_point(),
+ output()->scale());
+ }
+ else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
+ {
+ tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
+ input()->shape().num_dims(), getTensorData<uint8_t>(output()),
+ getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+ axes_data, num_axes, _params.keep_dims,
+ getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+ getTensorData<int>(temp_sum));
+ }
+ else
+ {
+ tflite::reference_ops::QuantizedMeanOrSum<>(
+ getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
+ getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+ getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
+ getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+ _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+ getTensorData<int>(temp_sum),
+ /*compute_sum=*/false);
+ }
+}
+
+void Mean::evalQuantizedS16() const
+{
+ const auto *input_data = getTensorData<int16_t>(input());
+ auto *output_data = getTensorData<int16_t>(output());
+
+ const Shape &input_shape = input()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const auto *axes_data = getTensorData<int32_t>(axes());
+ const int num_axes = axes()->shape().num_elements();
+
+ constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
+ constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+ // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+ if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 &&
+ ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
+ {
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t depth = input_shape.dim(3);
+ assert(output_shape.num_dims() == 4);
+ assert(output_shape.dim(0) == batches);
+ assert(output_shape.dim(1) == 1);
+ assert(output_shape.dim(2) == 1);
+ assert(output_shape.dim(3) == depth);
+
+ const double real_multiplier =
+ static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
+
+ int32_t output_multiplier{};
+ int output_shift{};
+ quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ const int32_t num_elements_in_axes = input_height * input_width;
+
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t c = 0; c < depth; ++c)
+ {
+ int32_t acc = 0;
+ for (int32_t in_y = 0; in_y < input_height; ++in_y)
+ {
+ for (int32_t in_x = 0; in_x < input_width; ++in_x)
+ {
+ acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)];
+ }
+ }
+ int32_t scaled_acc =
+ tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ // Divide by the number of elements rounding to the nearest integer.
+ scaled_acc = scaled_acc > 0
+ ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
+ : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
+
+ scaled_acc = std::max(scaled_acc, output_min);
+ scaled_acc = std::min(scaled_acc, output_max);
+
+ output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc;
+ }
+ }
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported configuration.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h
new file mode 100644
index 000000000..ed07ae561
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MEAN_H
+#define LUCI_INTERPRETER_KERNELS_MEAN_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Mean : public KernelWithParams<ReducerParams>
+{
+public:
+ Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+ Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *axes() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+ void evalQuantizedS16() const;
+
+private:
+ bool _need_temporaries = false;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MEAN_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp
new file mode 100644
index 000000000..d2c00935a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mean.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MeanTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MeanTest, FloatKeepDims)
+{
+ std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
+ 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+ std::vector<int32_t> axis_data{0, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ReducerParams params{};
+ params.keep_dims = true;
+
+ Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(temp_sum);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{10.5, 12.5, 14.5};
+ std::initializer_list<int32_t> ref_output_shape{1, 3, 1};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, FloatKeepDims4DMean)
+{
+ std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
+ 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+ std::vector<int32_t> axis_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ReducerParams params{};
+ params.keep_dims = true;
+
+ Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(temp_sum);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{6, 7, 18, 19};
+ std::initializer_list<int32_t> ref_output_shape{2, 1, 1, 2};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, FloatNotKeepDims)
+{
+ std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
+ 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+ std::vector<int32_t> axis_data{1, 0, -3, -3};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ReducerParams params{};
+ params.keep_dims = false;
+
+ Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(temp_sum);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{12, 13};
+ std::initializer_list<int32_t> ref_output_shape{2};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, Uint8KeepDims)
+{
+ float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
+ std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+ std::vector<int32_t> axis_data{1};
+ Tensor input_tensor = makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor temp_sum(DataType::U8, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ ReducerParams params{};
+ params.keep_dims = true;
+
+ Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(temp_sum);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0.3, 0.35, 0.55};
+ std::initializer_list<int32_t> ref_output_shape{3, 1};
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, kQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, Uint8NotKeepDims)
+{
+ float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
+ std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+ std::vector<int32_t> axis_data{1};
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 3, 2}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ ReducerParams params{};
+ params.keep_dims = false;
+
+ Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(temp_sum);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0.4, 0.4};
+ std::initializer_list<int32_t> ref_output_shape{1, 2};
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, kQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, SInt16KeepDims4D)
+{
+ std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
+ 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+ std::vector<int32_t> axes_data{1, 2};
+ std::vector<float> ref_output_data{6, 7, 18, 19};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({2, 2, 3, 2}, 0.25, 0, input_data, _memory_manager.get());
+ Tensor axes_tensor = makeInputTensor<DataType::S32>({2}, axes_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
+
+ ReducerParams params{};
+ params.keep_dims = true;
+
+ Mean kernel(&input_tensor, &axes_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+ params);
+ kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(temp_sum);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 1, 2}));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp
new file mode 100644
index 000000000..5d3dcde72
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Minimum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output)
+ : Kernel({input1, input2}, {output})
+{
+}
+
+void Minimum::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Minimum::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalMinimum<float>();
+ break;
+ case DataType::U8:
+ evalMinimum<uint8_t>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+template <typename T> inline void Minimum::evalMinimum() const
+{
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()),
+ [](T x, T y) { return std::min(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h
new file mode 100644
index 000000000..5ff4035b4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MINIMUM_H
+#define LUCI_INTERPRETER_KERNELS_MINIMUM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Minimum : public Kernel
+{
+public:
+ Minimum(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+ const Tensor *input1() const { return _inputs[0]; }
+ const Tensor *input2() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> inline void evalMinimum() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MINIMUM_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp
new file mode 100644
index 000000000..9a143643f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Minimum.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MinimumTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MinimumTest, Float)
+{
+ Shape input_shape{3, 1, 2};
+ std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+ std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+ Tensor input_tensor1 =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{-1.0, 0.0, -1.0, 11.0, -3.0, -1.44};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(MinimumTest, Uint8)
+{
+ Shape input_shape{3, 1, 2};
+ std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
+ std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
+ Tensor input_tensor1 =
+ makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<int32_t> ref_output_shape{2, 4};
+ EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+ ::testing::ElementsAreArray({0, 0, 1, 11, 2, 1}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp
new file mode 100644
index 000000000..bae1eac70
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MirrorPad.h"
+
+#include "kernels/Utils.h"
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+MirrorPad::MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output,
+ const MirrorPadParams &params)
+ : KernelWithParams<MirrorPadParams>({input, paddings}, {output}, params)
+{
+}
+
+void MirrorPad::configure()
+{
+ const Shape &input_shape = input()->shape();
+ const int num_dims = input_shape.num_dims();
+
+ if (num_dims > 4)
+ throw std::runtime_error("Unsupported number of dimensions.");
+
+ assert(output()->element_type() == input()->element_type());
+ assert(paddings()->element_type() == DataType::S32);
+ // Paddings shape should be [N, 2].
+ assert(paddings()->shape().num_dims() == 2);
+ assert(paddings()->shape().dim(0) == num_dims);
+ assert(paddings()->shape().dim(1) == 2);
+
+ Shape output_shape(num_dims);
+ const auto *paddings_data = getTensorData<int32_t>(paddings());
+ for (int i = 0; i < num_dims; ++i)
+ {
+ const int32_t padding_before = paddings_data[i * 2];
+ const int32_t padding_after = paddings_data[i * 2 + 1];
+ assert(padding_before >= 0 && padding_after >= 0);
+ output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+ }
+
+ output()->resize(output_shape);
+}
+
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+ Tensor &output);
+
+void MirrorPad::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ {
+ MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output());
+ break;
+ }
+ case DataType::U8:
+ {
+ assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+ assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+
+ MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output());
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+ Tensor &output)
+{
+ auto const input_dims = input.shape().num_dims();
+ auto const input_data = input.data<T>();
+ auto const paddings_data = paddings.data<int32_t>();
+ auto const output_data = output.data<T>();
+
+ auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1;
+ auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1;
+ auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1;
+ auto const input_d = input.shape().dim(input_dims - 1);
+
+ auto const input_h_offset = input_d * input_w;
+ auto const input_b_offset = input_h_offset * input_h;
+
+ auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1;
+ auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1;
+ auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1;
+ auto const output_d = output.shape().dim(input_dims - 1);
+
+ auto const left_b_pad = paddings_data[2 * (input_dims - 4)];
+ auto const left_h_pad = paddings_data[2 * (input_dims - 3)];
+ auto const left_w_pad = paddings_data[2 * (input_dims - 2)];
+ auto const left_d_pad = paddings_data[2 * (input_dims - 1)];
+
+ auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1];
+ auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1];
+ auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1];
+ auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1];
+
+ const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; };
+ const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h,
+ auto b) {
+ return d + w * input_d + h * input_h_offset + b * input_b_offset;
+ };
+
+ const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) {
+ bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1;
+ return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input);
+ };
+
+ const T *in_ptr = input_data;
+ T *out_ptr = output_data;
+
+ for (int32_t b = 0; b < output_b; ++b)
+ {
+ for (int32_t h = 0; h < output_h; ++h)
+ {
+ for (int32_t w = 0; w < output_w; ++w)
+ {
+ for (int32_t d = 0; d < output_d; ++d)
+ {
+ if (b < left_b_pad || b >= output_b - right_b_pad || //
+ h < left_h_pad || h >= output_h - right_h_pad || //
+ w < left_w_pad || w >= output_w - right_w_pad || //
+ d < left_d_pad || d >= output_d - right_d_pad)
+ {
+ if (mode == MirrorPadMode::REFLECT)
+ {
+ *out_ptr++ = input_data[offset_index(
+ positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w),
+ positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))];
+ }
+ else
+ {
+ *out_ptr++ = input_data[offset_index(
+ symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w),
+ symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))];
+ }
+ }
+ else
+ {
+ *out_ptr++ = *in_ptr++;
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h
new file mode 100644
index 000000000..d3e6e858a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
+#define LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class MirrorPad : public KernelWithParams<MirrorPadParams>
+{
+public:
+ MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output,
+ const MirrorPadParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *paddings() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp
new file mode 100644
index 000000000..740d8cb22
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MirrorPad.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MirrorPadTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ void Execute(const Tensor &input, const Tensor &padding, Tensor &output, MirrorPadMode mode)
+ {
+ MirrorPadParams params{};
+ params.mode = mode;
+
+ MirrorPad kernel(&input, &padding, &output, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output);
+ kernel.execute();
+ }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MirrorPadTest, FloatReflect)
+{
+ Shape input_shape = {1, 2, 2, 1};
+ Shape padding_shape = {4, 2};
+
+ std::vector<float> input_data{1.0f, 2.0f, //
+ 3.0f, 4.0f}; //
+ std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+ std::vector<float> ref_output_data{2.0f, 1.0f, 2.0f, 1.0f, 2.0f, //
+ 4.0f, 3.0f, 4.0f, 3.0f, 4.0f, //
+ 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, //
+ 4.0f, 3.0f, 4.0f, 3.0f, 4.0f, //
+ 2.0f, 1.0f, 2.0f, 1.0f, 2.0f}; //
+ std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric)
+{
+ Shape input_shape = {1, 2, 2, 1};
+ Shape padding_shape = {4, 2};
+
+ std::vector<float> input_data{1.0f, 2.0f, //
+ 3.0f, 4.0f}; //
+ std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+ std::vector<float> ref_output_data{3.0, 3.0, 4.0, 4.0, 3.0, //
+ 1.0, 1.0, 2.0, 2.0, 1.0, //
+ 1.0, 1.0, 2.0, 2.0, 1.0, //
+ 3.0, 3.0, 4.0, 4.0, 3.0, //
+ 3.0, 3.0, 4.0, 4.0, 3.0}; //
+ std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric2Dim)
+{
+ Shape input_shape = {3, 1};
+ Shape padding_shape = {2, 2};
+
+ std::vector<float> input_data{1.0f, 2.0f, 3.0f};
+ std::vector<int> padding_data{1, 2, 0, 0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+ std::vector<float> ref_output_data{1.0, 1.0, 2.0, 3.0, 3.0, 2.0};
+ std::initializer_list<int32_t> ref_output_shape{6, 1};
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Reflect)
+{
+ Shape input_shape = {1, 2, 3, 1};
+ Shape padding_shape = {4, 2};
+
+ float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+ std::vector<float> input_data{1.0f, 2.0f, 3.0f, //
+ 4.0f, 5.0f, 6.0f}; //
+ std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+ std::vector<float> ref_output_data{
+ 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+ 6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+ 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+ 6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+ 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+ };
+ std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, quant_tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Symmetric)
+{
+ Shape input_shape = {1, 2, 3, 1};
+ Shape padding_shape = {4, 2};
+
+ float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+ std::vector<float> input_data{1.0f, 2.0f, 3.0f, //
+ 4.0f, 5.0f, 6.0f}; //
+ std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+ std::vector<float> ref_output_data{
+ 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+ 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+ 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+ 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+ 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+ };
+ std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, quant_tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, UnsupportedDim_NEG)
+{
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 1}, {1.0f}, _memory_manager.get());
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>({5, 2}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+TEST_F(MirrorPadTest, InvalidInputType_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor padding_tensor = makeInputTensor<DataType::S32>({1, 2}, {0, 0}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp
new file mode 100644
index 000000000..531fb4fa1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mul.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include "PALMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params)
+ : KernelWithParams<MulParams>({input1, input2}, {output}, params)
+{
+}
+
+void Mul::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
+ if (input1()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+ input2()->zero_points().size() == 1)
+ LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
+ output()->zero_point() == 0);
+ }
+
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Mul::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Mul::evalFloat() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<float>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ luci_interpreter_pal::BroadcastMul4DSlow(
+ params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+ getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+ }
+ else
+ {
+ luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
+ getTensorShape(input2()), getTensorData<float>(input2()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ }
+}
+
+template <typename T> void Mul::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ luci_interpreter_pal::BroadcastMul4DSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
+void Mul::evalQuantizedS16() const
+{
+ const auto input1_scale = static_cast<double>(input1()->scale());
+ const auto input2_scale = static_cast<double>(input2()->scale());
+ const auto output_scale = static_cast<double>(output()->scale());
+
+ const double real_multiplier = input1_scale * input2_scale / output_scale;
+
+ int32_t output_multiplier;
+ int output_shift;
+ quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val,
+ int16_t input2_val) {
+ int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val);
+ output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift);
+ output = std::max(output, activation_min);
+ output = std::min(output, activation_max);
+ return static_cast<int16_t>(output);
+ };
+
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
+ getTensorShape(input2()), getTensorData<int16_t>(input2()),
+ getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h
new file mode 100644
index 000000000..c0cf817df
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MUL_H
+#define LUCI_INTERPRETER_KERNELS_MUL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Mul : public KernelWithParams<MulParams>
+{
+public:
+ Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params);
+
+ const Tensor *input1() const { return _inputs[0]; }
+ const Tensor *input2() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ template <typename T> void evalInteger() const;
+ void evalQuantizedS16() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MUL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp
new file mode 100644
index 000000000..fc0e60614
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mul.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MulTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MulTest, Float)
+{
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+ std::vector<std::vector<float>> test_outputs = {
+ {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
+ 0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
+ 0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
+ {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
+ {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
+ 0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
+ 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
+ {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
+ std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
+ 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+ std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+ << "With shape number " << i;
+ }
+ // Re-run with exchanged inputs.
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+ << "With shape number " << i;
+ }
+}
+
+template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+
+ dtype max_value = std::numeric_limits<dtype>::max();
+ dtype res_max = max_value - max_value % 10;
+
+ std::vector<std::vector<dtype>> test_outputs = {
+ {8, 0, 20, 0, 4, 30, //
+ 16, 0, 40, 3, 8, 0, //
+ 0, 0, 0, 6, 0, 0, //
+ 4, 0, 10, 9, 2, 0, //
+ 40, 0, 100, 0, 20, 150, //
+ 28, 0, 70, 0, 14, res_max},
+ {8, 0, 40, 3, 0, 0, 4, 0, 100, 0, 14, res_max},
+ {8, 12, 0, 0, 20, 30, 16, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0,
+ 0, 0, 9, 2, 0, 10, 0, 0, 0, 20, 30, 100, 150, 0, 0, 14, max_value / 10 * 2,
+ 70, res_max},
+ {8, 12, 0, 0, 0, 0, 0, 9, 20, 30, 70, res_max}};
+ std::vector<dtype> input1_data{2, 3, 4, -1, -3, -2, 1, -3, 10, 15, 7, max_value / 10};
+ std::vector<dtype> input2_data{4, 0, 10, -3, 2, 10};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+ // Re-run with exchanged inputs.
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+}
+
+TEST_F(MulTest, SInt64)
+{
+ checkInteger<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(MulTest, SInt32)
+{
+ checkInteger<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(MulTest, SInt16)
+{
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+ std::vector<std::vector<int32_t>> ref_output_shapes{
+ {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+
+ std::vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
+ 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+ std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+ std::vector<std::vector<float>> ref_outputs = {
+ {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
+ 0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
+ 0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
+ {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
+ {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
+ 0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
+ 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
+ {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data,
+ _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0,
+ input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
+ const float tolerance = output_tensor.scale() * 2;
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor),
+ ::testing::ElementsAreArray(ref_output_shapes[i]))
+ << "With shape number " << i;
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+ << "With shape number " << i;
+ }
+ // Re-run with exchanged inputs and different scales.
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0,
+ input2_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data,
+ _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 3.0 / 32767, 0);
+ const float tolerance = output_tensor.scale() * 2;
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor),
+ ::testing::ElementsAreArray(ref_output_shapes[i]))
+ << "With shape number " << i;
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+ << "With shape number " << i;
+ }
+}
+
+TEST_F(MulTest, Input_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(MulTest, Invalid_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(MulTest, Invalid_Input_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(MulTest, Invalid_Quantization_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+ MulParams params{};
+ params.activation = Activation::NONE;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp
new file mode 100644
index 000000000..c6fe08a9e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Neg.h"
+#include "kernels/Utils.h"
+
+#include "PALNeg.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Neg::Neg(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Neg::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+ output()->resize(input()->shape());
+}
+
+void Neg::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Neg::evalFloat() const
+{
+ luci_interpreter_pal::Negate(getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h
new file mode 100644
index 000000000..69fa1a18e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_NEG_H
+#define LUCI_INTERPRETER_KERNELS_NEG_H
+
+#include "core/Kernel.h"
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Neg : public Kernel
+{
+public:
+ Neg(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_NEG_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp
new file mode 100644
index 000000000..8b2bc1a82
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Neg.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<T> input_data, std::initializer_list<T> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType element_type = getElementType<T>();
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(element_type);
+
+ Neg kernel(&input_tensor, &output_tensor);
+
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(NegTest, FloatSimple)
+{
+ Check<float>(/*input_shape=*/{2, 3},
+ /*output_shape=*/{2, 3},
+ /*input_data=*/
+ {
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -1.0f, -2.0f, // Row 2
+ },
+ /*output_data=*/
+ {
+ 0.0f, -1.0f, -3.0f, // Row 1
+ -1.0f, 1.0f, 2.0f, // Row 2
+ });
+
+ SUCCEED();
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp
new file mode 100644
index 000000000..54e5eee34
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/NotEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+NotEqual::NotEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void NotEqual::configure()
+{
+ LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+ if (x()->element_type() == DataType::U8)
+ {
+ quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+ quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+ }
+ output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void NotEqual::execute() const
+{
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void NotEqual::evalFloat() const
+{
+ const auto x_data = getTensorData<float>(x());
+ const auto y_data = getTensorData<float>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowNotEqual(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::NotEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+ y_data, getTensorShape(output()), output_data);
+ }
+}
+
+template <typename T> void NotEqual::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
+void NotEqual::evalQuantized() const
+{
+ const auto x_data = getTensorData<uint8_t>(x());
+ const auto y_data = getTensorData<uint8_t>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.left_shift = 8;
+ op_params.input1_offset = -x()->zero_point(); // Note the '-'
+ op_params.input1_shift = _x_shift;
+ op_params.input1_multiplier = _x_multiplier;
+ op_params.input2_offset = -y()->zero_point(); // Note the '-'
+ op_params.input2_shift = _y_shift;
+ op_params.input2_multiplier = _y_multiplier;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowNotEqualWithScaling(
+ op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+ else
+ {
+ tflite::reference_ops::NotEqualWithScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h
new file mode 100644
index 000000000..d2aafe893
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class NotEqual : public Kernel
+{
+public:
+ NotEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+ const Tensor *x() const { return _inputs[0]; }
+ const Tensor *y() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ template <typename T> void evalInteger() const;
+ void evalQuantized() const;
+
+private:
+ int32_t _x_multiplier = 0;
+ int _x_shift = 0;
+ int32_t _y_multiplier = 0;
+ int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp
new file mode 100644
index 000000000..45bf4022a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/NotEqual.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class NotEqualTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(NotEqualTest, FloatSimple)
+{
+ std::vector<float> x_data{
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.7, 0.5, // Row 1
+ -1, 0, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, true, // Row 1
+ true, false, true, // Row 2
+ };
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(NotEqualTest, FloatBroardcast)
+{
+ std::vector<float> x_data{
+ 0.5, 0.7, 0.9, // Row 1
+ 1, 0, -1, // Row 2
+ -1, 0, 1, // Row 3
+ 0.9, 0.7, 0.5, // Row 4
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.7, 0.5, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, true, // Row 1
+ true, true, true, // Row 2
+ true, true, true, // Row 3
+ false, false, false, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value, -2, max_value};
+
+ std::vector<bool> ref_output_data{false, true, false};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -2, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value, -2, max_value, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, true, // Row 1
+ true, true, false, // Row 2
+ true, false, true, // Row 3
+ false, false, false, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(NotEqualTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(NotEqualTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(NotEqualTest, Uint8Quantized)
+{
+ std::vector<float> x_data{
+ 0.5, 0.5, 0.7, 0.9, // Row 1
+ 1, 0, 0.05, -1, // Row 2
+ };
+
+ std::vector<float> y_data{
+ 0.9, 0.5, 0.55, 0.5, // Row 1
+ -1, 0, 0.05, 1, // Row 2
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, true, true, // Row 1
+ true, false, false, true, // Row 2
+ };
+
+ std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+
+ std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(NotEqualTest, Uint8QuantizedBroadcast)
+{
+ std::vector<float> x_data{
+ 0.4, -0.8, 0.7, 0.3, // Row 1
+ -0.5, 0.1, 0, 0.5, // Row 2
+ 1, 0, 0.05, -1, // Row 3
+ -1, 0.05, 0, 1, // Row 4
+ };
+
+ std::vector<float> y_data{
+ -1, 0.05, 0, 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, true, true, true, // Row 1
+ true, true, false, true, // Row 2
+ true, true, true, true, // Row 3
+ false, false, false, false, // Row 4
+ };
+
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1}));
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(NotEqualTest, Input_Type_Mismatch_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Input_Output_Type_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp
new file mode 100644
index 000000000..4d3e5f2ef
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename T>
+void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor,
+ const Tensor *off_value_tensor, int32_t depth, int32_t axis,
+ Tensor *output_tensor)
+{
+ // define input shape and correct axis
+ auto const &input_shape = indices_tensor->shape();
+ axis = axis == -1 ? input_shape.num_dims() : axis;
+
+ // TODO support other integer input types
+ auto const *indices = getTensorData<int32_t>(indices_tensor);
+ auto const on_value = getTensorData<T>(on_value_tensor)[0];
+ auto const off_value = getTensorData<T>(off_value_tensor)[0];
+ auto *output = getTensorData<T>(output_tensor);
+
+ // prefix_dim_size == # of elements before the axis
+ // depth == # of elements per axis
+ // suffix_dim_size == # of elements after the axis
+ auto prefix_dim_size = 1;
+ for (int32_t i = 0; i < axis; ++i)
+ {
+ prefix_dim_size *= input_shape.dim(i);
+ }
+ assert(prefix_dim_size > 0);
+ auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size;
+
+ // View the indices as a matrix of size:
+ // prefix_dim_size x suffix_dim_size
+ // View the output as a matrix of size:
+ // prefix_dim_size x depth x suffix_dim_size
+ // Then the output is:
+ // output(i, j, k) == (indices(i, k) == j) ? on : off
+ for (int32_t i = 0; i < prefix_dim_size; ++i)
+ for (int32_t j = 0; j < depth; ++j)
+ for (int32_t k = 0; k < suffix_dim_size; ++k, ++output)
+ *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value;
+}
+
+} // namespace
+
+OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+ const Tensor *off_value, Tensor *output, const OneHotParams &params)
+ : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params)
+{
+ // Do nothing
+}
+
+void OneHot::configure()
+{
+ // check types
+ LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type());
+
+ // check shape dependent parameters
+ LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1);
+ LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1);
+ LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1);
+ LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims());
+
+ // define parameters that affect the output shape
+ auto const depth_value = getTensorData<int32_t>(depth())[0];
+ auto const &input_shape = indices()->shape();
+ auto const input_dims = input_shape.num_dims();
+ auto const axis = params().axis == -1 ? input_dims : params().axis;
+
+ // define output shape
+ Shape output_shape(input_shape.num_dims() + 1);
+ {
+ for (int32_t d = 0; d < axis; ++d)
+ output_shape.dim(d) = input_shape.dim(d);
+
+ output_shape.dim(axis) = depth_value;
+
+ for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d)
+ output_shape.dim(d) = input_shape.dim(d - 1);
+ }
+
+ // reshape output
+ output()->resize(output_shape);
+}
+
+void OneHot::execute() const
+{
+ auto const depth_value = getTensorData<int32_t>(depth())[0];
+ auto const axis = params().axis;
+
+ switch (output()->element_type())
+ {
+ case loco::DataType::FLOAT32:
+ OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output());
+ break;
+ case loco::DataType::U8:
+ OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+ break;
+ case loco::DataType::S16:
+ OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+ break;
+ default:
+ // TODO Support other data types
+ throw std::runtime_error("Not supported, yet!");
+ break;
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h
new file mode 100644
index 000000000..572f857ae
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ONEHOT_H
+#define LUCI_INTERPRETER_KERNELS_ONEHOT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class OneHot : public KernelWithParams<OneHotParams>
+{
+public:
+ OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+ const Tensor *off_value, Tensor *output, const OneHotParams &params);
+
+ const Tensor *indices() const { return _inputs[0]; }
+ const Tensor *depth() const { return _inputs[1]; }
+ const Tensor *on_value() const { return _inputs[2]; }
+ const Tensor *off_value() const { return _inputs[3]; }
+
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ONEHOT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp
new file mode 100644
index 000000000..45b6968fa
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<T1> input_data, std::initializer_list<int32_t> depth_data,
+ std::initializer_list<T2> on_value_data, std::initializer_list<T2> off_value_data,
+ int32_t axis, std::initializer_list<T2> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ constexpr auto input_type = getElementType<T1>();
+ constexpr auto output_type = getElementType<T2>();
+
+ Tensor input_tensor = makeInputTensor<input_type>(input_shape, input_data, memory_manager.get());
+ Tensor depth_tensor = makeInputTensor<DataType::S32>({}, depth_data, memory_manager.get());
+ Tensor on_value_tensor = makeInputTensor<output_type>({}, on_value_data, memory_manager.get());
+ Tensor off_value_tensor = makeInputTensor<output_type>({}, off_value_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(output_type);
+
+ OneHotParams params{};
+ params.axis = axis;
+
+ OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+ params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+ EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <typename T> class OneHotTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
+TYPED_TEST_SUITE(OneHotTest, DataTypes);
+
+TYPED_TEST(OneHotTest, BasicPattern)
+{
+ // axis 0
+ Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{4, 2, 3},
+ /*input_data=*/
+ {
+ 0, 3, 5, //
+ 7, 3, 0, //
+ },
+ /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+ /*axis=*/0,
+ /*output_data=*/
+ {
+ 1, 0, 0, //
+ 0, 0, 1, //
+
+ 0, 0, 0, //
+ 0, 0, 0, //
+
+ 0, 0, 0, //
+ 0, 0, 0, //
+
+ 0, 1, 0, //
+ 0, 1, 0, //
+ });
+ // axis 1
+ Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 4, 3},
+ /*input_data=*/
+ {
+ 0, 3, 5, //
+ 7, 3, 0, //
+ },
+ /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+ /*axis=*/1,
+ /*output_data=*/
+ {
+ 1, 0, 0, //
+ 0, 0, 0, //
+ 0, 0, 0, //
+ 0, 1, 0, //
+
+ 0, 0, 1, //
+ 0, 0, 0, //
+ 0, 0, 0, //
+ 0, 1, 0, //
+ });
+ // axis -1
+ Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3, 4},
+ /*input_data=*/
+ {
+ 0, 3, 5, //
+ 7, 3, 0, //
+ },
+ /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+ /*axis=*/-1,
+ /*output_data=*/
+ {
+ 1, 0, 0, 0, //
+ 0, 0, 0, 1, //
+ 0, 0, 0, 0, //
+
+ 0, 0, 0, 0, //
+ 0, 0, 0, 1, //
+ 1, 0, 0, 0, //
+ });
+}
+
+TEST(OneHotTest, UnsupportedInputType_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ // input type should be integer
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {0}, memory_manager.get());
+
+ Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+ Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+ Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ OneHotParams params = {-1};
+
+ OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+ params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, OutputTypeMismatch_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+ Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+
+ // type of on_value, off_value and output_tensor should be same
+ Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+ Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+ OneHotParams params = {-1};
+
+ OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+ params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, InvalidAxis_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+ Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+ Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+ Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ // axis should be in [-1, input_shape.rank]
+ OneHotParams params = {-2};
+
+ OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+ params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp
new file mode 100644
index 000000000..5a6b05c3a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PRelu.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <tensorflow/lite/kernels/internal/reference/prelu.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+PRelu::PRelu(const Tensor *input, const Tensor *alpha, Tensor *output)
+ : Kernel({input, alpha}, {output})
+{
+}
+
+PRelu::~PRelu()
+{
+ // Destructor declared to delete vector of alpha quantized data properly
+}
+
+void PRelu::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(input()->scales().size() <= 1);
+ LUCI_INTERPRETER_CHECK(output()->scales().size() <= 1);
+
+ if (input()->element_type() == DataType::U8)
+ {
+ LUCI_INTERPRETER_CHECK(alpha()->scales().size() <= 1); // remove when CWQ kernel arrives
+ _alpha_multipliers.resize(1);
+ double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale();
+ quantizeMultiplier(alpha_multiplier, &_alpha_multipliers[0].multiplier,
+ &_alpha_multipliers[0].shift);
+ double identity_multiplier = input()->scale() / output()->scale();
+ quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+ }
+ else if (input()->element_type() == DataType::S16)
+ {
+ // Common check for correctness of quant params
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+ for (size_t channel = 0; channel < alpha()->zero_points().size(); ++channel)
+ {
+ LUCI_INTERPRETER_CHECK(alpha()->zero_points()[channel] == 0);
+ }
+ // PRelu specific checks for CWQ
+ LUCI_INTERPRETER_CHECK(alpha()->quantized_dimension() == alpha()->shape().num_dims() - 1);
+ LUCI_INTERPRETER_CHECK(static_cast<int32_t>(alpha()->scales().size()) ==
+ alpha()->shape().dim(alpha()->quantized_dimension()));
+ LUCI_INTERPRETER_CHECK(alpha()->shape().num_elements() ==
+ input()->shape().dim(input()->shape().num_dims() - 1));
+
+ // all dimension of alpha except last one should be size 1
+ for (int dim = 0; dim < alpha()->shape().num_dims() - 1; ++dim)
+ {
+ LUCI_INTERPRETER_CHECK(alpha()->shape().dim(dim) == 1);
+ }
+
+ std::vector<double> real_multipliers =
+ getQuantizedConvolutionMultiplers(input()->scale(), alpha()->scales(), output()->scale());
+
+ _alpha_multipliers = quantizeMultipliers(real_multipliers);
+
+ double identity_multiplier = input()->scale() / output()->scale();
+ quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+ }
+ output()->resize(calculateShapeForBroadcast(input()->shape(), alpha()->shape()));
+}
+
+void PRelu::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void PRelu::evalFloat() const
+{
+ const auto input_data = getTensorData<float>(input());
+ const auto alpha_data = getTensorData<float>(alpha());
+ const auto size = getTensorShape(input()).FlatSize();
+ auto output_data = getTensorData<float>(output());
+
+ auto PReluFunc = [](float input, float alpha) { return input >= 0.0 ? input : input * alpha; };
+
+ if (input()->shape() != alpha()->shape())
+ {
+ tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+ getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()),
+ getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()),
+ PReluFunc);
+ }
+ else
+ {
+ for (auto i = decltype(size){0}; i < size; ++i)
+ {
+ if (input_data[i] >= 0)
+ output_data[i] = input_data[i];
+ else
+ output_data[i] = input_data[i] * alpha_data[i];
+ }
+ }
+}
+
+void PRelu::evalQuantized() const
+{
+ tflite::PreluParams op_params{};
+
+ op_params.input_offset = -input()->zero_point(); // Note the '-'.
+ op_params.alpha_offset = -alpha()->zero_point(); // Note the '-'.
+ op_params.output_offset = output()->zero_point();
+ op_params.output_shift_1 = _output_shift_identity;
+ op_params.output_multiplier_1 = _output_multiplier_identity;
+ op_params.output_shift_2 = _alpha_multipliers[0].shift;
+ op_params.output_multiplier_2 = _alpha_multipliers[0].multiplier;
+
+ if (input()->shape() != alpha()->shape())
+ {
+ tflite::reference_ops::BroadcastPrelu4DSlow(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+ getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Prelu<uint8_t>(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+ getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+}
+
+static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val,
+ const ChannelQuantMultipliers &identity_mult,
+ const ChannelQuantMultipliers &alpha_mult)
+{
+ constexpr int32_t quantized_min = std::numeric_limits<int16_t>::min();
+ constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max();
+
+ const int32_t output_val =
+ input_val >= 0
+ ? tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val),
+ identity_mult.multiplier, identity_mult.shift)
+ : tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val * alpha_val),
+ alpha_mult.multiplier, alpha_mult.shift);
+ const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
+ return clamped_output;
+}
+
+void PRelu::evalQuantizedS16() const
+{
+ // Note that this kernel assumes alpha is CWQ
+ tflite::RuntimeShape input_shape = getTensorShape(input());
+ const int16_t *input_data = input()->data<int16_t>();
+ const int16_t *alpha_data = alpha()->data<int16_t>();
+ int16_t *output_data = output()->data<int16_t>();
+
+ const ChannelQuantMultipliers pos_mult{_output_shift_identity, _output_multiplier_identity};
+
+ const int last_dim = input()->shape().num_dims() - 1;
+
+ int32_t outer_dims_size = 1;
+ for (int i = 0; i < last_dim; ++i)
+ outer_dims_size *= input_shape.Dims(i);
+ int32_t quant_dim_size = input_shape.Dims(last_dim);
+
+ for (int32_t outer_dims = 0; outer_dims < outer_dims_size; ++outer_dims)
+ for (int32_t quant_channel = 0; quant_channel < quant_dim_size; ++quant_channel)
+ {
+ const ChannelQuantMultipliers &neg_mult = _alpha_multipliers[quant_channel];
+ size_t offset = static_cast<size_t>(outer_dims) * static_cast<size_t>(quant_dim_size);
+ offset += quant_channel;
+
+ output_data[offset] =
+ evalElemS16PRelu(input_data[offset], alpha_data[quant_channel], pos_mult, neg_mult);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h
new file mode 100644
index 000000000..f7735d418
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PRELU_H
+#define LUCI_INTERPRETER_KERNELS_PRELU_H
+
+#include "core/Kernel.h"
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ChannelQuantMultipliers;
+
+class PRelu : public Kernel
+{
+public:
+ PRelu(const Tensor *input, const Tensor *alpha, Tensor *output);
+
+ ~PRelu();
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *alpha() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+ void evalQuantizedS16() const;
+
+private:
+ std::vector<ChannelQuantMultipliers> _alpha_multipliers;
+ // TODO merge this into one ChannelQuantMultiplier object
+ int32_t _output_multiplier_identity = 0;
+ int _output_shift_identity = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp
new file mode 100644
index 000000000..6d97382de
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PRelu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> alpha_shape,
+ std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
+ std::initializer_list<T> alpha_data, std::initializer_list<T> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType element_type = getElementType<T>();
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor alpha_tensor =
+ makeInputTensor<element_type>(alpha_shape, alpha_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(element_type);
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(PReluTest, FloatSimple)
+{
+ Check<float>(/*input_shape=*/{2, 3}, /*alpha_shape=*/{2, 3},
+ /*output_shape=*/{2, 3},
+ /*input_data=*/
+ {
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -1.0f, -2.0f, // Row 2
+ },
+ /*alpha_data=*/
+ {
+ 0.0f, 0.5f, 0.1f, // Row 1
+ 0.0f, 0.5f, 0.1f, // Row 2
+ },
+ /*output_data=*/
+ {
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -0.5f, -0.2f, // Row 2
+ });
+
+ SUCCEED();
+}
+
+TEST(PReluTest, FloatBroadcast)
+{
+ Check<float>(/*input_shape=*/{1, 2, 2, 3}, /*alpha_shape=*/{1, 1, 3},
+ /*output_shape=*/{1, 2, 2, 3},
+ /*input_data=*/
+ {
+ 0.0f, 0.0f, 0.0f, // Row 1, Column 1
+ 1.0f, 1.0f, 1.0f, // Row 1, Column 2
+ -1.0f, -1.0f, -1.0f, // Row 2, Column 1
+ -2.0f, -2.0f, -2.0f, // Row 2, Column 2
+ },
+ /*alpha_data=*/
+ {0.0f, 1.0f, 2.0f},
+ /*output_data=*/
+ {
+ 0.0f, 0.0f, 0.0f, // Row 1, Column 1
+ 1.0f, 1.0f, 1.0f, // Row 1, Column 2
+ 0.0f, -1.0f, -2.0f, // Row 2, Column 1
+ 0.0f, -2.0f, -4.0f, // Row 2, Column 2
+ });
+
+ SUCCEED();
+}
+
+float GetTolerance(float min, float max) { return (max - min) / 255.0; }
+
+TEST(PReluTest, Uint8Simple)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f};
+ std::vector<float> alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f};
+ std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f};
+
+ float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, kQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 3, 1}));
+
+ SUCCEED();
+}
+
+TEST(PReluTest, Uint8Broadcast)
+{
+ std::vector<float> input_data{
+ 0.0f, 0.0f, 0.0f, // Row 1, Column 1
+ 0.5f, 0.5f, 0.5f, // Row 1, Column 2
+ -1.0f, -1.0f, -1.0f, // Row 2, Column 1
+ -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+ };
+ std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
+ std::vector<float> ref_output_data{
+ 0.0f, 0.0f, 0.0f, // Row 1, Column 1
+ 0.5f, 0.5f, 0.5f, // Row 1, Column 2
+ 0.0f, -0.5f, 0.5f, // Row 2, Column 1
+ 0.0f, -0.125f, 0.125f // Row 2, Column 2
+ };
+ std::vector<float> ref_quant_output_data{
+ 128, 128, 128, // Row 1, Column 1
+ 192, 192, 192, // Row 1, Column 2
+ 128, 64, 192, // Row 2, Column 1
+ 128, 112, 144 // Row 2, Column 2
+ };
+ float kQuantizedTolerance = 2 * (1. / 256);
+ const float kMin = -1;
+ const float kMax = 127.f / 128.f;
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
+
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 2, 3}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 3}, quant_param.first, quant_param.second, alpha_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, kQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3}));
+ EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_quant_output_data));
+}
+
+TEST(PReluTest, SInt16_LWQ_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ // Rewrite this test in case layer-wise quantization for sint16 is supported
+ std::vector<float> input_data(6); // data is not important
+ std::vector<float> alpha_data(6);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data, memory_manager.get());
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_Simple)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
+ std::vector<float> alpha_data{0.5f, 0.25f};
+ std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
+
+ std::vector<float> alpha_scales{0.05f, 0.025f};
+ std::vector<int32_t> zerop{0, 0};
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, SInt16_CWQ_spatial_alpha_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input_data(6); // data is not important
+ std::vector<float> alpha_data(6);
+
+ std::vector<float> alpha_scales{0.25f, 0.05f};
+ std::vector<int32_t> zerop{0, 0};
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3,
+ alpha_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_wrong_dim_quant_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input_data(6); // data is not important
+ std::vector<float> alpha_data(6);
+
+ std::vector<float> alpha_scales{0.25f};
+ std::vector<int32_t> zerop{0};
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1,
+ alpha_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_uneven_shape1)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
+ std::vector<float> alpha_data{0.5f, 0.25f};
+ std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
+
+ std::vector<float> alpha_scales{0.05f, 0.025f};
+ std::vector<int32_t> zerop{0, 0};
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2,
+ alpha_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, SInt16_CWQ_uneven_shape2)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input_data{
+ 0.0f, 0.0f, 0.0f, // Row 1, Column 1
+ 0.5f, 0.5f, 0.5f, // Row 1, Column 2
+ -1.0f, -1.0f, -1.0f, // Row 2, Column 1
+ -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+ };
+ std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
+ std::vector<float> ref_output_data{
+ 0.0f, 0.0f, 0.0f, // Row 1, Column 1
+ 0.5f, 0.5f, 0.5f, // Row 1, Column 2
+ 0.0f, -0.5f, 0.5f, // Row 2, Column 1
+ 0.0f, -0.125f, 0.125f // Row 2, Column 2
+ };
+
+ std::vector<float> alpha_scales{1.f, 0.05f, 0.1f};
+ std::vector<int32_t> zerop{0, 0, 0};
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3,
+ alpha_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.001, 0);
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3}));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, Input_Output_Type_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Input_Alpha_Type_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::U8>({1}, {1}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Invalid_Input_Type_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST(PReluTest, Input_Output_U8_CWQ_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> scales{1.f, 1.f};
+ std::vector<int32_t> zerop{0, 0};
+ std::vector<float> dummy_data(4, 0.f);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+ Tensor output_tensor =
+ makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Input_Output_S16_CWQ_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> scales{1.f, 1.f};
+ std::vector<int32_t> zerop{0, 0};
+ std::vector<float> dummy_data(4, 0.f);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+ Tensor output_tensor =
+ makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Mixing_U8_S16_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> dummy_data(4, 0.f);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+ Tensor output_tensor =
+ makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+
+ PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp
new file mode 100644
index 000000000..42aab330c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pack.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pack::Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams &params)
+ : KernelWithParams<PackParams>(std::move(inputs), {output}, params)
+{
+}
+
+void Pack::configure()
+{
+ LUCI_INTERPRETER_CHECK(_inputs.size() == static_cast<uint32_t>(params().values_count));
+ const Tensor *t0 = _inputs[0];
+ const int dimension_size = t0->shape().num_dims() + 1;
+ int axis = params().axis;
+ if (axis < 0)
+ {
+ axis += dimension_size;
+ }
+ LUCI_INTERPRETER_CHECK(axis >= 0 && axis <= t0->shape().num_dims());
+
+ if (t0->element_type() != DataType::S32 && t0->element_type() != DataType::FLOAT32 &&
+ t0->element_type() != DataType::U8 && t0->element_type() != DataType::S8 &&
+ t0->element_type() != DataType::S16 && t0->element_type() != DataType::S64)
+ {
+ throw std::runtime_error("Unsupported type.");
+ }
+
+ for (uint32_t i = 1; i < _inputs.size(); ++i)
+ {
+ const Tensor *tensor = _inputs[i];
+ LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
+ LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
+ for (int d = 0; d < t0->shape().num_dims(); ++d)
+ {
+ LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
+ }
+ }
+
+ Shape output_shape(dimension_size);
+ int i = 0;
+ for (int index = 0; index < dimension_size; ++index)
+ {
+ if (index == axis)
+ {
+ output_shape.dim(index) = params().values_count;
+ }
+ else
+ {
+ output_shape.dim(index) = t0->shape().dim(i++);
+ }
+ }
+
+ if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 ||
+ t0->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point());
+ LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale());
+ // Guarantee input/output quantization params match as we do not support
+ // packing quantized tensors.
+ for (int i = 0; i < params().values_count; i++)
+ {
+ LUCI_INTERPRETER_CHECK(_inputs[i]->zero_point() == t0->zero_point());
+ LUCI_INTERPRETER_CHECK(_inputs[i]->scale() == t0->scale());
+ }
+ }
+
+ output()->resize(output_shape);
+}
+
+void Pack::execute() const
+{
+ switch (_inputs[0]->element_type())
+ {
+ case DataType::FLOAT32:
+ evalGeneric<float>();
+ break;
+ case DataType::U8:
+ evalGeneric<uint8_t>();
+ break;
+ case DataType::S8:
+ evalGeneric<int8_t>();
+ break;
+ case DataType::S16:
+ evalGeneric<int16_t>();
+ break;
+ case DataType::S32:
+ evalGeneric<int32_t>();
+ break;
+ case DataType::S64:
+ evalGeneric<int64_t>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+template <typename T> void Pack::evalGeneric() const
+{
+ const Tensor *t0 = _inputs[0];
+ const int dimension_size = t0->shape().num_dims() + 1;
+ int axis = params().axis;
+ if (axis < 0)
+ {
+ axis += dimension_size;
+ }
+
+ VectorOfTensors<T, true> inputs(_inputs);
+ tflite::PackParams params{};
+ params.axis = axis;
+ params.inputs_count = _inputs.size();
+ tflite::reference_ops::Pack<T>(params, inputs.shapes(), inputs.data(), getTensorShape(output()),
+ getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h
new file mode 100644
index 000000000..4a2fcfd80
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PACK_H
+#define LUCI_INTERPRETER_KERNELS_PACK_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pack : public KernelWithParams<PackParams>
+{
+public:
+ Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams &params);
+
+ const Tensor *input(int index) const { return _inputs[index]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> void evalGeneric() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PACK_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp
new file mode 100644
index 000000000..d16320b78
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pack.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
+ std::initializer_list<int32_t> output_shape, std::vector<std::vector<T>> input_datas,
+ std::initializer_list<T> output_data, int32_t axis)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType element_type = getElementType<T>();
+ std::vector<const Tensor *> inputs(input_datas.size());
+ std::vector<Tensor> tmp_inputs;
+ for (int i = 0; i < input_datas.size(); i++)
+ {
+ if (std::is_same<T, float>::value || std::is_same<T, int32_t>::value ||
+ std::is_same<T, int64_t>::value)
+ {
+ tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, ""));
+ memory_manager->allocate_memory(tmp_inputs[i]);
+ tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+ }
+ else if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
+ {
+ tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, ""));
+ memory_manager->allocate_memory(tmp_inputs[i]);
+ tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+ }
+ else
+ {
+ assert((std::is_same<T, int16_t>::value) && "unexpected dtype is tested");
+ tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f}, {0}}, ""));
+ memory_manager->allocate_memory(tmp_inputs[i]);
+ tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+ }
+ }
+ for (int i = 0; i < input_datas.size(); i++)
+ {
+ inputs[i] = &tmp_inputs[i];
+ }
+
+ Tensor output_tensor = makeOutputTensor(element_type);
+ if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
+ {
+ output_tensor = makeOutputTensor(element_type, 1.0f / 255, 128);
+ }
+ else if (std::is_same<T, int16_t>::value)
+ {
+ output_tensor = makeOutputTensor(element_type, 1.0f, 0);
+ }
+
+ PackParams params{};
+ params.axis = axis;
+ params.values_count = input_datas.size();
+ Pack kernel(inputs, &output_tensor, params);
+
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class PackTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<uint8_t, int8_t, int16_t, int32_t, int64_t, float>;
+TYPED_TEST_SUITE(PackTest, DataTypes);
+
+TYPED_TEST(PackTest, ThreeInputs)
+{
+ Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}},
+ /*output_shape=*/{3, 2},
+ /*input_datas=*/
+ {{1, 4}, {2, 5}, {3, 6}},
+ /*output_data=*/
+ {1, 4, 2, 5, 3, 6}, /*axis=*/0);
+
+ SUCCEED();
+}
+
+TYPED_TEST(PackTest, NegAxis)
+{
+ Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}},
+ /*output_shape=*/{2, 3},
+ /*input_datas=*/
+ {{1, 4}, {2, 5}, {3, 6}},
+ /*output_data=*/
+ {1, 2, 3, 4, 5, 6}, /*axis=*/-1);
+
+ SUCCEED();
+}
+
+TEST(Pack, MismatchingInputValuesCount_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input1_data{1, 4};
+ std::vector<float> input2_data{2, 5};
+ std::vector<float> input3_data{3, 6};
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get());
+ Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ PackParams params{};
+ {
+ params.axis = 0;
+ params.values_count = 2;
+
+ Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+ }
+}
+
+TEST(Pack, InvalidInputAxis_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input1_data{1, 4};
+ std::vector<float> input2_data{2, 5};
+ std::vector<float> input3_data{3, 6};
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get());
+ Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ PackParams params{};
+ {
+ params.axis = 2;
+ params.values_count = 3;
+
+ Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+ }
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp
new file mode 100644
index 000000000..c07f6e310
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pad.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pad::Pad(const Tensor *input, const Tensor *paddings, Tensor *output)
+ : Kernel({input, paddings}, {output})
+{
+}
+
+void Pad::configure()
+{
+ const Shape &input_shape = input()->shape();
+ const int num_dims = input_shape.num_dims();
+
+ if (num_dims > 4)
+ throw std::runtime_error("Unsupported number of dimensions.");
+
+ assert(output()->element_type() == input()->element_type());
+ assert(paddings()->element_type() == DataType::S32);
+ // Paddings shape should be [N, 2].
+ assert(paddings()->shape().num_dims() == 2);
+ assert(paddings()->shape().dim(0) == num_dims);
+ assert(paddings()->shape().dim(1) == 2);
+
+ Shape output_shape(num_dims);
+ const auto *paddings_data = getTensorData<int32_t>(paddings());
+ for (int i = 0; i < num_dims; ++i)
+ {
+ const int32_t padding_before = paddings_data[i * 2];
+ const int32_t padding_after = paddings_data[i * 2 + 1];
+ assert(padding_before >= 0 && padding_after >= 0);
+ output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+ }
+
+ output()->resize(output_shape);
+}
+
+void Pad::execute() const
+{
+ const int num_dims = input()->shape().num_dims();
+
+ tflite::PadParams params{};
+ params.left_padding_count = num_dims;
+ params.right_padding_count = num_dims;
+
+ const auto *paddings_data = getTensorData<int32_t>(paddings());
+ for (int i = num_dims - 1; i >= 0; --i)
+ {
+ params.left_padding[i] = paddings_data[i * 2];
+ params.right_padding[i] = paddings_data[i * 2 + 1];
+ }
+
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ {
+ const float pad_value = 0.0f;
+ tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
+ &pad_value, getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ case DataType::U8:
+ {
+ assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+ assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+ const auto pad_value = static_cast<uint8_t>(output()->zero_point());
+ tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ &pad_value, getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ }
+ case DataType::S8:
+ {
+ assert(output()->zero_point() >= std::numeric_limits<int8_t>::min());
+ assert(output()->zero_point() <= std::numeric_limits<int8_t>::max());
+ const auto pad_value = static_cast<int8_t>(output()->zero_point());
+ tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<int8_t>(input()),
+ &pad_value, getTensorShape(output()),
+ getTensorData<int8_t>(output()));
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h
new file mode 100644
index 000000000..e05b47f29
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PAD_H
+#define LUCI_INTERPRETER_KERNELS_PAD_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pad : public Kernel
+{
+public:
+ Pad(const Tensor *input, const Tensor *paddings, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *paddings() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PAD_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp
new file mode 100644
index 000000000..dd3ce947c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pad.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+float GetTolerance(float min, float max) { return (max - min) / 255.0; }
+
+TEST(Pad, Uint8)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+ std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
+ std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, kQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1}));
+}
+
+TEST(Pad, Int8)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+ std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-1.0f, 1.0f);
+ std::vector<float> input_data{-0.2, 0.4, 0.5, -0.7, -0.1, -0.9, 0.7, 0.1, 0.2};
+ std::vector<int32_t> paddings_data{0, 0, 1, 2, 2, 1, 0, 0};
+ Tensor input_tensor = makeInputTensor<DataType::S8>(
+ {1, 3, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+
+ Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, -0.2, 0.4, 0.5, 0,
+ 0, 0, -0.7, -0.1, -0.9, 0, 0, 0, 0.7, 0.1, 0.2, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, kQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 6, 6, 1}));
+}
+
+TEST(Pad, Float)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6};
+ std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 4, 5,
+ 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ std::initializer_list<int32_t> ref_output_shape{2, 4, 6, 1};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp
new file mode 100644
index 000000000..197cdaa69
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PadV2.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+PadV2::PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values,
+ Tensor *output)
+ : Kernel({input, paddings, constant_values}, {output})
+{
+}
+
+void PadV2::configure()
+{
+ const Shape &input_shape = input()->shape();
+ const int num_dims = input_shape.num_dims();
+
+ if (num_dims > 4)
+ throw std::runtime_error("Unsupported number of dimensions.");
+
+ assert(output()->element_type() == input()->element_type());
+ assert(paddings()->element_type() == DataType::S32);
+ assert(constant_values()->element_type() == output()->element_type());
+ // Paddings shape should be [N, 2].
+ assert(paddings()->shape().num_dims() == 2);
+ assert(paddings()->shape().dim(0) == num_dims);
+ assert(paddings()->shape().dim(1) == 2);
+ // Constant values elements number should be 1.
+ assert(constant_values()->shape().num_elements() == 1);
+
+ Shape output_shape(num_dims);
+ const auto *paddings_data = getTensorData<int32_t>(paddings());
+ for (int i = 0; i < num_dims; ++i)
+ {
+ const int32_t padding_before = paddings_data[i * 2];
+ const int32_t padding_after = paddings_data[i * 2 + 1];
+ assert(padding_before >= 0 && padding_after >= 0);
+ output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+ }
+
+ output()->resize(output_shape);
+}
+
+void PadV2::execute() const
+{
+ const int num_dims = input()->shape().num_dims();
+
+ tflite::PadParams params{};
+ params.left_padding_count = num_dims;
+ params.right_padding_count = num_dims;
+
+ const auto *paddings_data = getTensorData<int32_t>(paddings());
+ for (int i = num_dims - 1; i >= 0; --i)
+ {
+ params.left_padding[i] = paddings_data[i * 2];
+ params.right_padding[i] = paddings_data[i * 2 + 1];
+ }
+
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ {
+ const auto pad_value = getTensorData<float>(constant_values())[0];
+ tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
+ &pad_value, getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ case DataType::U8:
+ {
+ assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+ assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+ const auto pad_value = getTensorData<uint8_t>(constant_values())[0];
+ tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ &pad_value, getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h
new file mode 100644
index 000000000..48a31f584
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PAD_V2_H
+#define LUCI_INTERPRETER_KERNELS_PAD_V2_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class PadV2 : public Kernel
+{
+public:
+ PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *paddings() const { return _inputs[1]; }
+ const Tensor *constant_values() const { return _inputs[2]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PAD_V2_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp
new file mode 100644
index 000000000..41efaff06
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PadV2.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+float GetTolerance(float min, float max) { return (max - min) / 255.0; }
+
+TEST(PadV2, Uint8)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+ std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
+ std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
+ std::vector<float> constant_values_data{0.5};
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+ Tensor constant_values = makeInputTensor<DataType::U8>(
+ {1}, quant_param.first, quant_param.second, constant_values_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data = {
+ 0.5, -0.8, 0.2, 0.9, 0.5, 0.5, 0.5, 0.5, 0.7, 0.1, -0.3, 0.5, 0.5, 0.5, //
+ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5}; //
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, kQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1}));
+}
+
+TEST(PadV2, Float)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6};
+ std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0};
+ std::vector<float> constant_values_data{7};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+ Tensor constant_values =
+ makeInputTensor<DataType::FLOAT32>({1}, constant_values_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 1, 2, 3, 7, 7, 7, 4, 5,
+ 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7};
+ std::initializer_list<int32_t> ref_output_shape{2, 4, 6, 1};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp
new file mode 100644
index 000000000..722c64024
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pow.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output)
+ : Kernel({input1, input2}, {output})
+{
+}
+
+void Pow::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Pow::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ eval<float>();
+ break;
+ case DataType::S32:
+ eval<int32_t>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+template <typename T> void Pow::eval() const
+{
+ tflite::ArithmeticParams params{};
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastPow4DSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Pow(getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h
new file mode 100644
index 000000000..8ff865e40
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_POW_H
+#define LUCI_INTERPRETER_KERNELS_POW_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pow : public Kernel
+{
+public:
+ Pow(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+ const Tensor *input1() const { return _inputs[0]; }
+ const Tensor *input2() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> void eval() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_POW_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp
new file mode 100644
index 000000000..0e858115d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pow.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class PowTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(PowTest, SimplePow)
+{
+ std::initializer_list<int32_t> base_shape = {1, 1, 3, 2};
+
+ std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f};
+ std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+ std::vector<float> test_outputs{0.786f, 1.2838f, 1.043f, 0.7071f, 0.8f, 1.08956f};
+
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
+}
+
+TEST_F(PowTest, FloatBroadcastPow)
+{
+ std::initializer_list<int32_t> input1_shape = {1, 3};
+ std::initializer_list<int32_t> input2_shape = {3, 1};
+
+ std::vector<float> input1_data{0.3f, 2.3f, 0.9f};
+ std::vector<float> input2_data{0.2f, 0.3f, 0.4f};
+ std::vector<float> test_outputs{0.786f, 1.18126f, 0.9791f, 0.6968f, 1.28386f,
+ 0.96888f, 0.6178f, 1.3953f, 0.9587f};
+
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+}
+
+TEST_F(PowTest, IntPow)
+{
+ std::initializer_list<int32_t> base_shape = {1, 3};
+
+ std::vector<int32_t> input_data{2, 3, 4};
+ std::vector<int32_t> test_outputs{4, 27, 256};
+
+ Tensor input1_tensor =
+ makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(test_outputs));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
+}
+
+TEST_F(PowTest, Input_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(PowTest, Input_Type_Mismatch_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {4}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(PowTest, Invalid_Input_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+ kernel.configure();
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp
new file mode 100644
index 000000000..0c8544a65
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/Utils.h"
+#include "PALQuantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output)
+{
+ int32_t multiplier;
+ int shift;
+
+ const double effective_output_scale = input->scale() / output->scale();
+ quantizeMultiplier(effective_output_scale, &multiplier, &shift);
+
+ const auto input_shape = getTensorShape(input);
+ const auto output_shape = getTensorShape(output);
+ const auto size = tflite::MatchingFlatSize(input_shape, output_shape);
+
+ const auto input_data = getTensorData<input_dtype>(input);
+
+ switch (output->element_type())
+ {
+ case loco::DataType::S8:
+ luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+ output->zero_point(), getTensorData<int8_t>(output));
+ break;
+ case loco::DataType::U8:
+ luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+ output->zero_point(), getTensorData<uint8_t>(output));
+ break;
+ case loco::DataType::S16:
+ luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+ output->zero_point(), getTensorData<int16_t>(output));
+ break;
+ default:
+ throw std::runtime_error("Unsupported quantized type, yet!");
+ }
+}
+
+} // namespace
+
+Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Quantize::configure()
+{
+
+ if (input()->element_type() == loco::DataType::S16)
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+ switch (input()->element_type())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::U8 ||
+ output()->element_type() == loco::DataType::S8 ||
+ output()->element_type() == loco::DataType::S16);
+ break;
+ }
+ case loco::DataType::S16:
+ case loco::DataType::S8:
+ case loco::DataType::U8:
+ {
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8 ||
+ output()->element_type() == loco::DataType::U8 ||
+ output()->element_type() == loco::DataType::S16);
+ if (output()->element_type() == loco::DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type");
+ }
+
+ output()->resize(input()->shape());
+}
+
+void Quantize::execute() const
+{
+ switch (input()->element_type())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ tflite::QuantizationParams op_params;
+ op_params.zero_point = output()->zero_point();
+ op_params.scale = output()->scale();
+ const auto input_data = getTensorData<float>(input());
+
+ switch (output()->element_type())
+ {
+ case loco::DataType::S8:
+ {
+ luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+ getTensorShape(output()), getTensorData<int8_t>(output()));
+ break;
+ }
+ case loco::DataType::U8:
+ {
+ luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+ getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ }
+ case loco::DataType::S16:
+ {
+ luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+ getTensorShape(output()),
+ getTensorData<int16_t>(output()));
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+ break;
+ }
+ case loco::DataType::S16:
+ {
+ call_requantize<int16_t>(input(), output());
+ break;
+ }
+ case loco::DataType::S8:
+ {
+ call_requantize<int8_t>(input(), output());
+ break;
+ }
+ case loco::DataType::U8:
+ {
+ call_requantize<uint8_t>(input(), output());
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h
new file mode 100644
index 000000000..006c5366f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Quantize : public Kernel
+{
+public:
+ Quantize(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_QUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp
new file mode 100644
index 000000000..22e67fe3f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class QuantizeTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(QuantizeTest, FloatUint8)
+{
+ std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+ std::vector<uint8_t> ref_output_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt8)
+{
+ std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+ std::vector<int8_t> ref_output_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt16)
+{
+ std::vector<float> input_data{-63.5, -63, -3, -2, -1, 1, 2, 3, 63.5, 64};
+
+ std::vector<int16_t> ref_output_data{-12700, -12600, -600, -400, -200,
+ 200, 400, 600, 12700, 12800};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.005, /*zero_point*/ 0);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int16)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ std::vector<int16_t> ref_output_data{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
+
+ Tensor input_tensor = makeInputTensor<DataType::S16>(
+ {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.5, /*zero_point*/ 0);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int8Int8)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+ Tensor input_tensor = makeInputTensor<DataType::S8>(
+ {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ -1, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Uint8Uint8)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ std::vector<uint8_t> ref_output_data{129, 131, 133, 135, 137, 139, 141, 143, 145, 147};
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ 127, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int8)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+ Tensor input_tensor = makeInputTensor<DataType::S16>(
+ {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, InvalidInputType_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForFloatInput_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 1, 2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt16Input_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt8Input_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForUint8Input_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidInputZeroPoint_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, -1, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp
new file mode 100644
index 000000000..747ec6cc8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu.h"
+#include "kernels/Utils.h"
+
+#include "PALRelu.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Relu::Relu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Relu::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ if (input()->element_type() == DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+ }
+
+ if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+ {
+ double multiplier = input()->scale() / output()->scale();
+ quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
+ }
+ output()->resize(input()->shape());
+}
+
+void Relu::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Relu::evalFloat() const
+{
+ const auto input_data = getTensorData<float>(input());
+ const auto input_shape = getTensorShape(input());
+ auto output_data = getTensorData<float>(output());
+ auto output_shape = getTensorShape(output());
+
+ luci_interpreter_pal::Relu(input_shape, input_data, output_shape, output_data);
+}
+
+void Relu::evalQuantized() const
+{
+ tflite::ReluParams params;
+ params.input_offset = input()->zero_point();
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = _output_multiplier;
+ params.output_shift = _output_shift;
+
+ params.quantized_activation_min =
+ std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+ params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max());
+
+ luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void Relu::evalQuantizedS16() const
+{
+ const auto *input_data = getTensorData<int16_t>(input());
+ auto *output_data = getTensorData<int16_t>(output());
+
+ constexpr int32_t output_min = 0;
+ constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+ const int32_t num_elements = input()->shape().num_elements();
+
+ for (int32_t i = 0; i < num_elements; ++i)
+ {
+ const int32_t input_val = input_data[i];
+ int32_t output_val =
+ tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift);
+ output_val = std::max(output_val, output_min);
+ output_val = std::min(output_val, output_max);
+ output_data[i] = static_cast<int16_t>(output_val);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h
new file mode 100644
index 000000000..b813f0cdf
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RELU_H
+#define LUCI_INTERPRETER_KERNELS_RELU_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Relu : public Kernel
+{
+public:
+ Relu(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+ void evalQuantizedS16() const;
+
+private:
+ int32_t _output_multiplier{0};
+ int32_t _output_shift{0};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RELU_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp
new file mode 100644
index 000000000..bd32e3cc9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReluTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ReluTest, FloatSimple)
+{
+ std::vector<float> input_data{
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -1.0f, -2.0f, // Row 2
+ };
+
+ std::vector<float> ref_output_data{
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, 0.0f, 0.0f, // Row 2
+ };
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Relu kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(ReluTest, Uint8Quantized)
+{
+ std::vector<float> input_data{
+ 0, -6, 2, 4, //
+ 3, -2, 7, 1, //
+ };
+ // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+ const float f_min = (-128.0 / 128.0) * 8;
+ const float f_max = (127.0 / 128.0) * 8;
+
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ Relu kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+ ::testing::ElementsAreArray({128, 128, 160, 192, 176, 128, 240, 144}));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1}));
+}
+
+TEST_F(ReluTest, Uint8Requantized)
+{
+ std::vector<float> input_data{
+ 0, -6, 2, 4, //
+ 3, -2, 7, 1, //
+ };
+
+ // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+ const float in_min = (-128.0 / 128.0) * 8;
+ const float in_max = (127.0 / 128.0) * 8;
+ const float out_min = (0.0 / 256.0) * 8;
+ const float out_max = (255.0 / 256.0) * 8;
+
+ std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get());
+
+ std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
+
+ Relu kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+ ::testing::ElementsAreArray({0, 0, 64, 128, 96, 0, 224, 32}));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1}));
+}
+
+TEST_F(ReluTest, SInt16)
+{
+ std::vector<float> input_data{
+ 0, -6, 2, 4, //
+ 3, -2, 7, 1, //
+ };
+ std::vector<float> ref_output_data{
+ 0, 0, 2, 4, //
+ 3, 0, 7, 1, //
+ };
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.25, 0);
+
+ Relu kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(ReluTest, Input_Output_Type_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ Relu kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ReluTest, Invalid_Input_Type_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ Relu kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp
new file mode 100644
index 000000000..07205ed3a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu6.h"
+#include "kernels/Utils.h"
+
+#include "PALRelu6.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Relu6::Relu6(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Relu6::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+ if (input()->element_type() == DataType::U8)
+ {
+ double multiplier = input()->scale() / output()->scale();
+ quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
+ }
+ output()->resize(input()->shape());
+}
+
+void Relu6::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Relu6::evalFloat() const
+{
+ const auto input_data = getTensorData<float>(input());
+ const auto input_shape = getTensorShape(input());
+ auto output_data = getTensorData<float>(output());
+ auto output_shape = getTensorShape(output());
+
+ luci_interpreter_pal::Relu6(input_shape, input_data, output_shape, output_data);
+}
+
+void Relu6::evalQuantized() const
+{
+ tflite::ReluParams params;
+ params.input_offset = input()->zero_point();
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = _output_multiplier;
+ params.output_shift = _output_shift;
+
+ params.quantized_activation_min =
+ std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+ params.quantized_activation_max =
+ std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
+ params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
+
+ luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h
new file mode 100644
index 000000000..f5030b588
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RELU6_H
+#define LUCI_INTERPRETER_KERNELS_RELU6_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Relu6 : public Kernel
+{
+public:
+ Relu6(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+
+private:
+ int32_t _output_multiplier{0};
+ int32_t _output_shift{0};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RELU6_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp
new file mode 100644
index 000000000..af7b3f3db
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu6.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class Relu6Test : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(Relu6Test, FloatSimple)
+{
+ std::vector<float> input_data{
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 7.0f, -1.0f, -2.0f, // Row 2
+ };
+
+ std::vector<float> ref_output_data{
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 6.0f, 0.0f, 0.0f, // Row 2
+ };
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Relu6 kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(Relu6Test, Uint8Quantized)
+{
+ // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+ const float f_min = (-128.0 / 128.0) * 10;
+ const float f_max = (127.0 / 128.0) * 10;
+ const float tolerance = (f_max - f_min) / 255.0;
+
+ std::vector<float> input_data{
+ 0, -6, 2, 8, //
+ -2, 3, 7, 1, //
+ };
+
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ Relu6 kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+ ::testing::ElementsAreArray({128, 128, 154, 205, 128, 166, 205, 141}));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance));
+}
+
+TEST_F(Relu6Test, Uint8Requantized)
+{
+ // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+ const float in_min = (-128.0 / 128.0) * 10;
+ const float in_max = (127.0 / 128.0) * 10;
+ const float out_min = (0.0 / 256.0) * 0;
+ const float out_max = (255.0 / 256.0) * 6;
+ const float tolerance = (in_max - in_min) / 255.0;
+
+ std::vector<float> input_data{
+ 0, -6, 2, 8, //
+ -2, 3, 7, 1, //
+ };
+
+ std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get());
+
+ std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
+
+ Relu6 kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+ EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+ ::testing::ElementsAreArray({0, 0, 87, 255, 0, 127, 255, 43}));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance));
+}
+
+TEST_F(Relu6Test, Input_Output_Type_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ Relu6 kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Relu6Test, Invalid_Input_Type_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ Relu6 kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp
new file mode 100644
index 000000000..61d3300b2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Reshape.h"
+
+#include <cassert>
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+static Shape extractShapeFromTensor(const Tensor *tensor)
+{
+ assert(tensor->element_type() == DataType::S32);
+ Shape shape(tensor->shape().num_elements());
+ const auto *shape_data = tensor->data<int32_t>();
+ for (int i = 0; i < tensor->shape().num_elements(); ++i)
+ {
+ shape.dim(i) = shape_data[i];
+ }
+ return shape;
+}
+
+static void resolveUnknownDimension(const Shape &input_shape, Shape *output_shape)
+{
+ const int32_t num_input_elements = input_shape.num_elements();
+ int32_t num_output_elements = 1;
+ int unknown_dim_index = -1;
+ for (int i = 0; i < output_shape->num_dims(); ++i)
+ {
+ const int32_t value = output_shape->dim(i);
+ if (value == -1)
+ {
+ assert(unknown_dim_index == -1);
+ unknown_dim_index = i;
+ }
+ else
+ {
+ num_output_elements *= value;
+ }
+ }
+ if (unknown_dim_index != -1)
+ {
+ output_shape->dim(unknown_dim_index) = num_input_elements / num_output_elements;
+ num_output_elements *= output_shape->dim(unknown_dim_index);
+ }
+ assert(num_output_elements == num_input_elements);
+}
+
+Reshape::Reshape(const Tensor *input, const Tensor *shape, Tensor *output)
+ : Kernel({input, shape}, {output})
+{
+}
+
+void Reshape::configure()
+{
+ Shape output_shape = extractShapeFromTensor(shape());
+ resolveUnknownDimension(input()->shape(), &output_shape);
+ output()->resize(output_shape);
+}
+
+void Reshape::execute() const
+{
+ const auto *input_data = input()->data<void>();
+ auto *output_data = output()->data<void>();
+
+ const size_t element_size = getDataTypeSize(input()->element_type());
+ const int32_t num_elements = input()->shape().num_elements();
+ std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h
new file mode 100644
index 000000000..99b947f77
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESHAPE_H
+#define LUCI_INTERPRETER_KERNELS_RESHAPE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Reshape : public Kernel
+{
+public:
+ Reshape(const Tensor *input, const Tensor *shape, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *shape() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESHAPE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp
new file mode 100644
index 000000000..c2ff3ea1b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Reshape.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReshapeTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// TODO Test types other than FLOAT32.
+
+TEST_F(ReshapeTest, Regular)
+{
+ Shape input_shape{1, 2, 2, 3};
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+ Shape shape_shape{2};
+ std::vector<int32_t> shape_data{3, 4};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor shape_tensor =
+ makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
+}
+
+TEST_F(ReshapeTest, UnknownDimension)
+{
+ Shape input_shape{2, 1, 2, 3};
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+ Shape shape_shape{3};
+ std::vector<int32_t> shape_data{2, -1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor shape_tensor =
+ makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp
new file mode 100644
index 000000000..e2ddd6a7b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeBilinear.h"
+
+#include "kernels/Utils.h"
+
+#include "PALResizeBilinear.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeBilinear::ResizeBilinear(const Tensor *input, const Tensor *size, Tensor *output,
+ const ResizeBilinearParams &params)
+ : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeBilinear::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+ if (params().half_pixel_centers && params().align_corners)
+ throw std::runtime_error("If half_pixel_centers is True, align_corners must be False.");
+ LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+ Shape output_shape(4);
+ output_shape.dim(0) = input()->shape().dim(0);
+ output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+ output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+ output_shape.dim(3) = input()->shape().dim(3);
+ output()->resize(output_shape);
+}
+
+void ResizeBilinear::execute() const
+{
+ tflite::ResizeBilinearParams op_params{};
+ op_params.align_corners = params().align_corners;
+ op_params.half_pixel_centers = params().half_pixel_centers;
+ switch (output()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::ResizeBilinear(
+ op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
+ getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ luci_interpreter_pal::ResizeBilinear(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+ getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h
new file mode 100644
index 000000000..b7bdc2ab7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ResizeBilinear : public KernelWithParams<ResizeBilinearParams>
+{
+public:
+ ResizeBilinear(const Tensor *input, const Tensor *shape, Tensor *output,
+ const ResizeBilinearParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *size() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
new file mode 100644
index 000000000..933a1128c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeBilinear.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape,
+ std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+ std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
+ bool align_corners, bool half_pixel_centers)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ResizeBilinearParams params{};
+ params.align_corners = align_corners;
+ params.half_pixel_centers = half_pixel_centers;
+
+ ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> size_shape,
+ std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data,
+ std::initializer_list<int32_t> size_data,
+ std::initializer_list<float> output_data, bool align_corners,
+ bool half_pixel_centers)
+{
+ // On TFlite example use Uint8 value it self, so this means quant param scale 1.0f and zero
+ // point 0.
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, 1.0, 0, input_data, memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0, 0);
+
+ ResizeBilinearParams params{};
+ params.align_corners = align_corners;
+ params.half_pixel_centers = half_pixel_centers;
+
+ ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class ResizeBilinearTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ResizeBilinearTest, DataTypes);
+
+TYPED_TEST(ResizeBilinearTest, SimpleTest)
+{
+ Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ {3, 3},
+ {
+ 3, 5, 6, //
+ 7, 9, 10, //
+ 9, 11, 12, //
+ 4, 8, 10, //
+ 8, 12, 14, //
+ 10, 14, 16, //
+ },
+ false, false);
+ SUCCEED();
+}
+
+TEST(ResizeBilinearTest, HalfPixelCenterFloatTest)
+{
+ Check<float>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+ {
+ 1, 2, //
+ 3, 4, //
+ 1, 2, //
+ 3, 4 //
+ },
+ {3, 3},
+ {
+ 1, 1.5, 2, //
+ 2, 2.5, 3, //
+ 3, 3.5, 4, //
+ 1, 1.5, 2, //
+ 2, 2.5, 3, //
+ 3, 3.5, 4, //
+ },
+ false, true);
+ SUCCEED();
+}
+
+TEST(ResizeBilinearTest, HalfPixelCenterUint8Test)
+{
+ Check<uint8_t>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 12, 16 //
+ },
+ {3, 3},
+ {
+ 2, 4, 6, //
+ 6, 7, 9, //
+ 9, 10, 12, //
+ 4, 7, 10, //
+ 8, 10, 13, //
+ 12, 14, 16, //
+ },
+ false, true);
+ SUCCEED();
+}
+
+TEST(ResizeBilinearTest, InputShapeInvalid_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ResizeBilinearParams params{};
+ params.align_corners = false;
+ params.half_pixel_centers = false;
+
+ ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, SizeShapeInvalid_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ResizeBilinearParams params{};
+ params.align_corners = false;
+ params.half_pixel_centers = false;
+
+ ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, SizeDimInvalid_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ResizeBilinearParams params{};
+ params.align_corners = false;
+ params.half_pixel_centers = false;
+
+ ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, InvalidParams_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ResizeBilinearParams params{};
+ params.align_corners = true;
+ params.half_pixel_centers = true;
+
+ ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
new file mode 100644
index 000000000..306cefbc2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+#include "PALResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size,
+ Tensor *output,
+ const ResizeNearestNeighborParams &params)
+ : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeNearestNeighbor::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+ Shape output_shape(4);
+ output_shape.dim(0) = input()->shape().dim(0);
+ output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+ output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+ output_shape.dim(3) = input()->shape().dim(3);
+ output()->resize(output_shape);
+}
+
+void ResizeNearestNeighbor::execute() const
+{
+ tflite::ResizeNearestNeighborParams op_params{};
+ op_params.align_corners = params().align_corners;
+ op_params.half_pixel_centers = params().half_pixel_centers;
+ switch (output()->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::reference_ops::ResizeNearestNeighbor(
+ op_params, getTensorShape(input()), getTensorData<int32_t>(input()), getTensorShape(size()),
+ getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output()));
+ break;
+ case DataType::U8:
+ luci_interpreter_pal::ResizeNearestNeighbor(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+ getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h
new file mode 100644
index 000000000..137d031cf
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ResizeNearestNeighbor : public KernelWithParams<ResizeNearestNeighborParams>
+{
+public:
+ ResizeNearestNeighbor(const Tensor *input, const Tensor *shape, Tensor *output,
+ const ResizeNearestNeighborParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *size() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
new file mode 100644
index 000000000..7ade02a6f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeNearestNeighbor.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape,
+ std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+ std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
+ bool align_corners, bool half_pixel_centers)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ResizeNearestNeighborParams params{};
+ params.align_corners = align_corners;
+ params.half_pixel_centers = half_pixel_centers;
+
+ ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> size_shape,
+ std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data,
+ std::initializer_list<int32_t> size_data,
+ std::initializer_list<float> output_data, bool align_corners,
+ bool half_pixel_centers)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ std::pair<float, int32_t> quant_param =
+ quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+ std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.first);
+
+ ResizeNearestNeighborParams params{};
+ params.align_corners = align_corners;
+ params.half_pixel_centers = half_pixel_centers;
+
+ ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class ResizeNearestNeighborTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ResizeNearestNeighborTest, DataTypes);
+
+TYPED_TEST(ResizeNearestNeighborTest, SimpleTest)
+{
+ Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ {3, 3},
+ {
+ 3, 3, 6, //
+ 3, 3, 6, //
+ 9, 9, 12, //
+ 4, 4, 10, //
+ 4, 4, 10, //
+ 10, 10, 16, //
+ },
+ false, false);
+}
+
+TYPED_TEST(ResizeNearestNeighborTest, AlignCenterTest)
+{
+ Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ {3, 3},
+ {
+ 3, 6, 6, //
+ 9, 12, 12, //
+ 9, 12, 12, //
+ 4, 10, 10, //
+ 10, 16, 16, //
+ 10, 16, 16, //
+ },
+ true, false);
+}
+
+TYPED_TEST(ResizeNearestNeighborTest, HalfPixelCenterTest)
+{
+ Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ {3, 3},
+ {
+ 3, 6, 6, //
+ 9, 12, 12, //
+ 9, 12, 12, //
+ 4, 10, 10, //
+ 10, 16, 16, //
+ 10, 16, 16, //
+ },
+ false, true);
+}
+
+TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ResizeNearestNeighborParams params{};
+ params.align_corners = false;
+ params.half_pixel_centers = false;
+
+ ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ResizeNearestNeighborParams params{};
+ params.align_corners = false;
+ params.half_pixel_centers = false;
+
+ ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeNearestNeighborTest, SizeDimInvalid_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ ResizeNearestNeighborParams params{};
+ params.align_corners = false;
+ params.half_pixel_centers = false;
+
+ ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp
new file mode 100644
index 000000000..1b6a5cc3b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReverseV2.h"
+#include "kernels/Utils.h"
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+ReverseV2::ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output)
+ : Kernel({input, axes}, {output})
+{
+}
+
+void ReverseV2::configure()
+{
+ assert(axes()->shape().num_dims() == 1);
+ assert(input()->shape().num_dims() >= axes()->shape().num_elements());
+ if (input()->element_type() != DataType::S32 && input()->element_type() != DataType::FLOAT32 &&
+ input()->element_type() != DataType::U8 && input()->element_type() != DataType::S16 &&
+ input()->element_type() != DataType::S64)
+ {
+ throw std::runtime_error("Unsupported input type.");
+ }
+ if (axes()->element_type() != DataType::S32)
+ {
+ throw std::runtime_error("Unsupported axes type.");
+ }
+ if (axes()->shape().num_elements() > 1)
+ {
+ throw std::runtime_error("Current implementation does not support more than 1 axis.");
+ }
+ int axis_value = getTensorData<int32_t>(axes())[0];
+ if (axis_value < 0 || axis_value >= input()->shape().num_dims())
+ {
+ throw std::runtime_error("Invalid axes value");
+ }
+ assert(input()->element_type() == output()->element_type());
+
+ output()->resize(input()->shape());
+}
+
+void ReverseV2::execute() const
+{
+ int axis_value = getTensorData<int32_t>(axes())[0];
+ switch (output()->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::reference_ops::Reverse<float>(axis_value, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ tflite::reference_ops::Reverse<uint8_t>(
+ axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported output type");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h
new file mode 100644
index 000000000..51211c703
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_REVERSE_H
+#define LUCI_INTERPRETER_KERNELS_REVERSE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ReverseV2 : public Kernel
+{
+public:
+ ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *axes() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_REVERSE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp
new file mode 100644
index 000000000..c0025faca
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReverseV2.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class ReverseV2Test : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ReverseV2Test, DataTypes);
+
+TYPED_TEST(ReverseV2Test, MultiDimensions)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ // TypeParam
+ std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
+ Shape input_shape{4, 3, 2};
+ std::vector<int32_t> axis_data{1};
+ Shape axis_shape{1};
+
+ std::vector<TypeParam> output_data{5, 6, 3, 4, 1, 2, 11, 12, 9, 10, 7, 8,
+ 17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20};
+ std::vector<int32_t> output_shape{4, 3, 2};
+
+ Tensor input_tensor =
+ makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data, memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+ ReverseV2 kernel = ReverseV2(&input_tensor, &axis_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+ ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp
new file mode 100644
index 000000000..6dd92dc98
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Rsqrt::configure()
+{
+ if (input()->element_type() != output()->element_type())
+ {
+ throw std::runtime_error("Input/output tensor data type mismatch.");
+ }
+ output()->resize(input()->shape());
+}
+
+void Rsqrt::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Rsqrt::evalFloat() const
+{
+ auto in = getTensorData<float>(input());
+ auto out = getTensorData<float>(output());
+ auto size = getTensorShape(input()).FlatSize();
+ for (auto i = in; i != in + size; ++i)
+ {
+ *out = 1.f / std::sqrt(*i);
+ ++out;
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h
new file mode 100644
index 000000000..adc5bcfa2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RSQRT_H
+#define LUCI_INTERPRETER_KERNELS_RSQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Rsqrt : public Kernel
+{
+public:
+ Rsqrt(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RSQRT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp
new file mode 100644
index 000000000..3c6494232
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Rsqrt kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(RsqrtTest, SimpleRsqrt)
+{
+ Check(
+ /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+ /*input_data=*/
+ {
+ 5, 4, 8, 2, //
+ 6, 7.5, 9, 0.3, //
+ },
+ /*output_data=*/
+ {
+ 0.44721360, 0.5, 0.35355339, 0.70710678, //
+ 0.40824829, 0.36514837, 0.33333333, 1.8257419, //
+ });
+}
+
+TEST(RsqrtTest, Input_Output_Type_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ Rsqrt kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(RsqrtTest, Invalid_Input_Type_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ Rsqrt kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp
new file mode 100644
index 000000000..40d79aaa3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/Utils.h"
+#include "PALSVDF.h"
+
+#include <tensorflow/lite/kernels/internal/quantization_util.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+TfLiteFusedActivation get_tflite_activation(Activation activation)
+{
+ switch (activation)
+ {
+ case luci::FusedActFunc::RELU:
+ return kTfLiteActRelu;
+ case luci::FusedActFunc::RELU6:
+ return kTfLiteActRelu6;
+ case luci::FusedActFunc::RELU_N1_TO_1:
+ return kTfLiteActReluN1To1;
+ case luci::FusedActFunc::TANH:
+ return kTfLiteActTanh;
+ case luci::FusedActFunc::SIGN_BIT:
+ return kTfLiteActSignBit;
+ case luci::FusedActFunc::NONE:
+ return kTfLiteActNone;
+ default:
+ throw std::runtime_error("Unsupported activation type");
+ }
+}
+} // namespace
+
+SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+ const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+ Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+ Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+ const SVDFParams &params)
+ : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state},
+ {output, scratchpad_activation_state, scratchpad_1, scratchpad_2,
+ scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6},
+ params)
+{
+ // Do nothing
+}
+
+void SVDF::configure()
+{
+ const Shape &input_shape = input()->shape();
+ const Shape &weight_features_shape = weight_feature()->shape();
+ const Shape &weight_time_shape = weight_time()->shape();
+
+ // Validate Input Tensor:
+ LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32 ||
+ input()->element_type() == loco::DataType::S8);
+ LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2);
+
+ // Validate inputs and output types
+ if (input()->element_type() == loco::DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::S8);
+ LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::S16 ||
+ weight_time()->element_type() == loco::DataType::S8);
+ if (bias())
+ LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::S32);
+
+ LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::S16 ||
+ input_activation_state()->element_type() == loco::DataType::S8);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8);
+
+ // Note: now tflite support only ReLU activation for integer SVDF
+ LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::RELU);
+ }
+ else if (weight_feature()->element_type() == loco::DataType::FLOAT32)
+ {
+ LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::FLOAT32);
+ if (bias())
+ LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
+ }
+ else if ((weight_feature()->element_type() == loco::DataType::U8 ||
+ weight_feature()->element_type() == loco::DataType::S8) &&
+ input()->element_type() == loco::DataType::FLOAT32)
+ {
+ // TODO:: support hybrid SVDF op
+ throw std::runtime_error("Hybrid type is not currently supported");
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported type.");
+ }
+
+ // Check all the parameters of tensor match within themselves and match the
+ // input configuration.
+ const int rank = params().svdf_rank;
+ const int batch_size = input_shape.dim(0);
+ const int num_filters = weight_features_shape.dim(0);
+ LUCI_INTERPRETER_CHECK(rank != 0);
+ LUCI_INTERPRETER_CHECK(num_filters % rank == 0);
+
+ const int num_units = num_filters / rank;
+ const int memory_size = weight_time_shape.dim(1);
+
+ // Validate Weight_Feature Input Tensor:
+ LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1));
+
+ // Validate Weight_Time Input Tensor:
+ LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters);
+
+ // Validate Bias
+ if (bias())
+ LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units);
+
+ // Validate Input Activation State
+ LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size);
+ LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters);
+
+ // Resize scratchpad_state to input_activation_state
+ auto scratchpad_activation_state = getOutputTensors()[1];
+ scratchpad_activation_state->resize({batch_size, memory_size * num_filters});
+
+ // Resize output tensor
+ output()->resize({batch_size, num_units});
+
+ luci_interpreter_pal::SetupScratchpadTensor(
+ input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2],
+ getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6],
+ getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units);
+}
+
+void SVDF::execute() const
+{
+ switch (weight_feature()->element_type())
+ {
+ case loco::DataType::FLOAT32:
+ evalFloat();
+ break;
+ case loco::DataType::S8:
+ {
+ if (input()->element_type() == loco::DataType::S8)
+ evalInteger();
+ else
+ // TODO:: support hybrid SVDF op
+ throw std::runtime_error("Hybrid type is not currently supported");
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type");
+ }
+}
+
+void SVDF::evalInteger() const
+{
+ const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() /
+ input_activation_state()->scale());
+ const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() *
+ weight_time()->scale() / output()->scale());
+
+ int32_t effective_scale_1_a;
+ int effective_scale_1_b;
+ int32_t effective_scale_2_a;
+ int effective_scale_2_b;
+
+ tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b);
+ tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b);
+
+ TfLiteSVDFParams params_svdf{};
+ params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+ params_svdf.rank = params().svdf_rank;
+ params_svdf.activation = get_tflite_activation(params().activation);
+
+ auto scratchpad_activation_state = getOutputTensors()[1];
+ // Note: it is expected that activation_state input variable tensor reset to zero,
+ // also expected that this variable tensor doesn't have buffer
+ auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state);
+ std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+ auto scratchpad = getOutputTensors()[2];
+ auto output_temp = getOutputTensors()[3];
+
+ int32_t input_zp = input()->zero_point();
+ int32_t output_zp = output()->zero_point();
+ luci_interpreter_pal::IntegerSVDF(
+ params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()),
+ getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()),
+ getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()),
+ getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()),
+ getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad),
+ getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b,
+ effective_scale_2_a, effective_scale_2_b, input_zp, output_zp);
+}
+
+void SVDF::evalFloat() const
+{
+ TfLiteSVDFParams params_svdf{};
+ params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+ params_svdf.rank = params().svdf_rank;
+ params_svdf.activation = get_tflite_activation(params().activation);
+
+ auto scratchpad_activation_state = getOutputTensors()[1];
+ // Note: it is expected that activation_state input variable tensor reset to zero,
+ // also expected that this variable tensor doesn't have buffer
+ auto scratchpad_data = getTensorData<float>(scratchpad_activation_state);
+ std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+ auto scratchpad_1 = getOutputTensors()[2];
+
+ luci_interpreter_pal::FloatSVDF(
+ params_svdf, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(weight_feature()), getTensorData<float>(weight_feature()),
+ getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()),
+ getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data,
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h
new file mode 100644
index 000000000..335a6cd8f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SVDF_H
+#define LUCI_INTERPRETER_KERNELS_SVDF_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SVDF : public KernelWithParams<SVDFParams>
+{
+public:
+ SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+ const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+ Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+ Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+ const SVDFParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *weight_feature() const { return _inputs[1]; }
+ const Tensor *weight_time() const { return _inputs[2]; }
+ const Tensor *bias() const { return _inputs[3]; }
+ const Tensor *input_activation_state() const { return _inputs[4]; }
+
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalInteger() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp
new file mode 100644
index 000000000..82bd9b009
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class SVDFTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(SVDFTest, FullIntegerTest)
+{
+ const int32_t batches = 2;
+ const int32_t input_size = 3;
+ const int32_t units = 4;
+ const int32_t memory_size = 10;
+ const int32_t rank = 1;
+ const int32_t num_filters = units * rank;
+
+ Shape input_shape{batches, input_size};
+ Shape weight_feature_shape{num_filters, input_size};
+ Shape weight_time_shape{num_filters, memory_size};
+ Shape bias_shape{units};
+ Shape activation_state_shape{batches, memory_size * num_filters};
+
+ std::vector<float> input_data{0.49837467, 0.19278903, 0.26584083,
+ 0.17660543, 0.52949083, -0.77931279};
+
+ std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347,
+ 0.22197971, 0.12416199, 0.27901134, 0.27557442,
+ 0.3905206, -0.36137494, -0.06634006, -0.10640851};
+
+ std::vector<float> weight_time_data{
+ -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156,
+ 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199,
+
+ 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518,
+ -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296,
+
+ -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236,
+ 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846,
+
+ -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166,
+ -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657};
+
+ std::vector<float> bias_data{-0.0976817, 0.15294972, 0.39635518, -0.02702999};
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-1, 1);
+ std::pair<float, int32_t> weight_feature_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+ std::pair<float, int32_t> weight_time_quant_param = quantizationParams<int16_t>(-1, 1);
+ std::pair<float, int32_t> bias_quant_param = quantizationParams<int32_t>(-512, 512);
+ std::pair<float, int32_t> activation_state_quant_param = quantizationParams<int16_t>(-16, 16);
+
+ std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor weight_feature_tensor = makeInputTensor<DataType::S8>(
+ weight_feature_shape, weight_feature_quant_param.first, weight_feature_quant_param.second,
+ weight_feature_data, _memory_manager.get());
+ Tensor weight_time_tensor = makeInputTensor<DataType::S16>(
+ weight_time_shape, weight_time_quant_param.first, weight_time_quant_param.second,
+ weight_time_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(
+ bias_shape, bias_quant_param.first, bias_quant_param.second, bias_data, _memory_manager.get());
+ Tensor activation_state_tensor = makeOutputTensor(
+ DataType::S16, activation_state_quant_param.first, activation_state_quant_param.second);
+ activation_state_tensor.resize(activation_state_shape);
+ Tensor output_tensor =
+ makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+ Tensor scratchpad_activation_state(DataType::S16, Shape({}), {}, "");
+ Tensor scratchpad_1(DataType::S32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::S32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+ SVDFParams params{};
+ params.activation = Activation::RELU;
+ params.asymmetric_quantize_inputs = false;
+ params.svdf_rank = rank;
+
+ SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, &bias_tensor,
+ &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+ &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad_activation_state);
+ _memory_manager->allocate_memory(scratchpad_1);
+ _memory_manager->allocate_memory(scratchpad_2);
+ _memory_manager->allocate_memory(scratchpad_3);
+ _memory_manager->allocate_memory(scratchpad_4);
+ _memory_manager->allocate_memory(scratchpad_5);
+ _memory_manager->allocate_memory(scratchpad_6);
+ kernel.execute();
+
+ std::vector<int8_t> ref_output_data{-9, 24, 31, 1, -10, 10, -3, 0};
+
+ std::vector<int32_t> ref_output_shape{batches, units};
+ EXPECT_THAT(extractTensorData<int8_t>(output_tensor), ref_output_data);
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, FloatTest)
+{
+ const int32_t batches = 2;
+ const int32_t input_size = 3;
+ const int32_t units = 4;
+ const int32_t memory_size = 10;
+ const int32_t rank = 1;
+ const int32_t num_filters = units * rank;
+
+ Shape input_shape{batches, input_size};
+ Shape weight_feature_shape{num_filters, input_size};
+ Shape weight_time_shape{num_filters, memory_size};
+ Shape activation_state_shape{batches, memory_size * num_filters};
+
+ std::vector<float> input_data{0.12609188, -0.46347019, -0.89598465,
+ 0.35867718, 0.36897406, 0.73463392};
+
+ std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347,
+ 0.22197971, 0.12416199, 0.27901134, 0.27557442,
+ 0.3905206, -0.36137494, -0.06634006, -0.10640851};
+
+ std::vector<float> weight_time_data{
+ -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156,
+ 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199,
+
+ 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518,
+ -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296,
+
+ -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236,
+ 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846,
+
+ -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166,
+ -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+ weight_feature_shape, weight_feature_data, _memory_manager.get());
+ Tensor weight_time_tensor =
+ makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+ Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ activation_state_tensor.resize(activation_state_shape);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+ SVDFParams params{};
+ params.activation = Activation::NONE;
+ params.asymmetric_quantize_inputs = false;
+ params.svdf_rank = rank;
+
+ SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+ &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+ &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad_activation_state);
+ _memory_manager->allocate_memory(scratchpad_1);
+ _memory_manager->allocate_memory(scratchpad_2);
+ _memory_manager->allocate_memory(scratchpad_3);
+ _memory_manager->allocate_memory(scratchpad_4);
+ _memory_manager->allocate_memory(scratchpad_5);
+ _memory_manager->allocate_memory(scratchpad_6);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0.014899, -0.0517661, -0.143725, -0.00271883,
+ -0.03004015, 0.09565311, 0.1587342, 0.00784263};
+
+ std::vector<float> ref_output_shape{batches, units};
+ const float tolerance = 1e-5;
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, Unsupported_Type_Configure_NEG)
+{
+ const int32_t batches = 2;
+ const int32_t input_size = 3;
+ const int32_t units = 4;
+ const int32_t memory_size = 10;
+ const int32_t rank = 1;
+ const int32_t num_filters = units * rank;
+
+ Shape input_shape{batches, input_size};
+ Shape weight_feature_shape{num_filters, input_size};
+ Shape weight_time_shape{num_filters, memory_size};
+ Shape activation_state_shape{batches, memory_size * num_filters};
+
+ std::vector<int32_t> input_data{0, 1, 3, 4, 4, -2};
+
+ std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347,
+ 0.22197971, 0.12416199, 0.27901134, 0.27557442,
+ 0.3905206, -0.36137494, -0.06634006, -0.10640851};
+
+ std::vector<float> weight_time_data{
+ -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156,
+ 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199,
+
+ 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518,
+ -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296,
+
+ -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236,
+ 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846,
+
+ -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166,
+ -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+ weight_feature_shape, weight_feature_data, _memory_manager.get());
+ Tensor weight_time_tensor =
+ makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+ Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ activation_state_tensor.resize(activation_state_shape);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+ SVDFParams params{};
+ params.activation = Activation::NONE;
+ params.asymmetric_quantize_inputs = false;
+ params.svdf_rank = rank;
+
+ SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+ &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+ &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SVDFTest, Invalid_Input_Shape_NEG)
+{
+ const int32_t batches = 2;
+ const int32_t right_input_size = 3;
+ const int32_t wrong_input_size = 4;
+ const int32_t units = 4;
+ const int32_t memory_size = 10;
+ const int32_t rank = 1;
+ const int32_t num_filters = units * rank;
+
+ Shape input_shape{batches, wrong_input_size};
+ Shape weight_feature_shape{num_filters, right_input_size};
+ Shape weight_time_shape{num_filters, memory_size};
+ Shape activation_state_shape{batches, memory_size * num_filters};
+
+ std::vector<float> input_data{0, 1, 3, 2, 4, 4, -2, 1};
+
+ std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347,
+ 0.22197971, 0.12416199, 0.27901134, 0.27557442,
+ 0.3905206, -0.36137494, -0.06634006, -0.10640851};
+
+ std::vector<float> weight_time_data{
+ -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156,
+ 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199,
+
+ 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518,
+ -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296,
+
+ -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236,
+ 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846,
+
+ -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166,
+ -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+ weight_feature_shape, weight_feature_data, _memory_manager.get());
+ Tensor weight_time_tensor =
+ makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+ Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ activation_state_tensor.resize(activation_state_shape);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+ SVDFParams params{};
+ params.activation = Activation::NONE;
+ params.asymmetric_quantize_inputs = false;
+ params.svdf_rank = rank;
+
+ SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+ &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+ &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp
new file mode 100644
index 000000000..0429fe1e5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Shape.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams &params)
+ : KernelWithParams<ShapeParams>({input}, {output}, params)
+{
+}
+
+void ShapeKernel::configure()
+{
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or
+ output()->element_type() == DataType::S64);
+ const auto input_shape = input()->shape();
+
+ Shape output_shape(1);
+ output_shape.dim(0) = input_shape.num_dims();
+
+ output()->resize(output_shape);
+}
+
+void ShapeKernel::execute() const
+{
+ switch (params().out_type)
+ {
+ case DataType::S32:
+ evalInt<int32_t>();
+ break;
+ case DataType::S64:
+ evalInt<int64_t>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+template <typename T> void ShapeKernel::evalInt() const
+{
+ const auto input_shape = input()->shape();
+
+ auto output_data = getTensorData<T>(output());
+
+ for (int i = 0; i < input_shape.num_dims(); ++i)
+ {
+ output_data[i] = input_shape.dim(i);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h
new file mode 100644
index 000000000..cfaadec91
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SHAPE_H
+#define LUCI_INTERPRETER_KERNELS_SHAPE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ShapeKernel : public KernelWithParams<ShapeParams>
+{
+public:
+ ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> void evalInt() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SHAPE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp
new file mode 100644
index 000000000..4763e016c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Shape.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ShapeTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+template <typename T> void runShapeKernel(loco::DataType dataType, IMemoryManager *memory_manager)
+{
+ Shape input_shape{1, 3, 1, 3, 5};
+
+ Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, "");
+ Tensor output_tensor = makeOutputTensor(dataType);
+
+ ShapeParams params{};
+ params.out_type = dataType;
+
+ ShapeKernel kernel(&input_tensor, &output_tensor, params);
+
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<T> ref_output_data{1, 3, 1, 3, 5};
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data);
+
+ std::vector<int32_t> ref_output_shape{5};
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ShapeTest, OutTypeInt)
+{
+
+ // Run for int32_t output
+ runShapeKernel<int32_t>(loco::DataType::S32, _memory_manager.get());
+ // Run for int64_t output
+ runShapeKernel<int64_t>(loco::DataType::S64, _memory_manager.get());
+
+ SUCCEED();
+}
+
+TEST_F(ShapeTest, Invalid_Output_Type_NEG)
+{
+ Shape input_shape{1, 3};
+
+ Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, "");
+ Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+ ShapeParams params{};
+ params.out_type = loco::DataType::FLOAT32;
+
+ ShapeKernel kernel(&input_tensor, &output_tensor, params);
+
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp
new file mode 100644
index 000000000..2fe2c5471
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Slice.h"
+#include "Utils.h"
+#include "PALSlice.h"
+
+#include <cassert>
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+const int max_dim = 4;
+
+Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output)
+ : Kernel({input, begin, size}, {output})
+{
+}
+
+template <typename T>
+Shape calculateOutputShape(const Tensor *input, const Tensor *begin, const Tensor *size)
+{
+ Shape output_shape = Shape(input->shape().num_dims());
+ for (int idx = 0; idx < input->shape().num_dims(); idx++)
+ {
+ T size_value = getTensorData<T>(size)[idx];
+ if (size_value < 0)
+ {
+ if (size_value != -1)
+ {
+ throw std::runtime_error("Invalid size.");
+ }
+ size_value = input->shape().dim(idx) - getTensorData<T>(begin)[idx];
+ }
+ else
+ {
+ if (input->shape().dim(idx) < getTensorData<T>(begin)[idx] + size_value)
+ {
+ throw std::runtime_error("Invalid begin and size.");
+ }
+ }
+ output_shape.dim(idx) = static_cast<int>(size_value);
+ }
+ return output_shape;
+}
+
+template <typename T>
+void getBeginAndSizeVectors(int dimensions, const Tensor *begin, const Tensor *size,
+ std::vector<int> *begins, std::vector<int> *sizes)
+{
+ for (int idx = dimensions - 1; idx >= 0; --idx)
+ {
+ begins->push_back(getTensorData<T>(begin)[idx]);
+ sizes->push_back(getTensorData<T>(size)[idx]);
+ }
+}
+
+void Slice::configure()
+{
+ assert(input()->element_type() == output()->element_type());
+ assert(begin()->element_type() == DataType::S32 || begin()->element_type() == DataType::S64);
+ assert(size()->element_type() == DataType::S32 || size()->element_type() == DataType::S64);
+ assert(begin()->shape().num_dims() == 1);
+ assert(size()->shape().num_dims() == 1);
+ assert(input()->shape().num_dims() <= max_dim);
+
+ if (begin()->element_type() == DataType::S32)
+ {
+ output()->resize(calculateOutputShape<int32_t>(input(), begin(), size()));
+ }
+ else if (begin()->element_type() == DataType::S64)
+ {
+ output()->resize(calculateOutputShape<int64_t>(input(), begin(), size()));
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Slice::execute() const
+{
+ std::vector<int> begins;
+ begins.reserve(max_dim);
+ std::vector<int> sizes;
+ sizes.reserve(max_dim);
+ if (begin()->element_type() == DataType::S32)
+ {
+ getBeginAndSizeVectors<int32_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
+ }
+ else if (begin()->element_type() == DataType::S64)
+ {
+ getBeginAndSizeVectors<int64_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported begin type.");
+ }
+ for (int i = input()->shape().num_dims(); i < max_dim; ++i)
+ {
+ begins.push_back(0);
+ sizes.push_back(1);
+ }
+
+ assert(begins.size() == 4);
+ assert(sizes.size() == 4);
+ tflite::SliceParams op_params{};
+ op_params.begin_count = 4;
+ op_params.size_count = 4;
+ for (int i = 0; i < 4; i++)
+ {
+ op_params.begin[i] = begins[3 - i];
+ op_params.size[i] = sizes[3 - i];
+ }
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::Slice(op_params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ case DataType::S8:
+ luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+ getTensorData<int8_t>(input()), getTensorShape(output()),
+ getTensorData<int8_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported input type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h
new file mode 100644
index 000000000..23c359608
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SLICE_H
+#define LUCI_INTERPRETER_KERNELS_SLICE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Slice : public Kernel
+{
+public:
+ Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *begin() const { return _inputs[1]; }
+ const Tensor *size() const { return _inputs[2]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SLICE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp
new file mode 100644
index 000000000..517982990
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Slice.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class SliceTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(SliceTest, DataTypes);
+
+TYPED_TEST(SliceTest, SimpleTest)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ std::vector<TypeParam> input_data{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6};
+ Shape input_shape{3, 2, 3, 1};
+ std::vector<int32_t> begin_data{1, 0, 0, 0};
+ Shape begin_shape{4};
+ std::vector<int32_t> size_data{2, 1, -1, 1};
+ Shape size_shape{4};
+ std::vector<TypeParam> output_data{3, 3, 3, 5, 5, 5};
+ std::vector<int32_t> output_shape{2, 1, 3, 1};
+
+ Tensor input_tensor =
+ makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+ Tensor begin_tensor =
+ makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+ Slice kernel(&input_tensor, &begin_tensor, &size_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+ ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp
new file mode 100644
index 000000000..c230aaa70
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Softmax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+#include "PALSoftmax.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Softmax::Softmax(const Tensor *input, Tensor *output, const SoftmaxParams &params)
+ : KernelWithParams<SoftmaxParams>({input}, {output}, params)
+{
+}
+
+void Softmax::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= 1);
+ if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 || output()->zero_point() == 0);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8 ||
+ output()->zero_point() == std::numeric_limits<int8_t>::min());
+ tflite::SoftmaxParams op_params{};
+ op_params.table = _table;
+ luci_interpreter_pal::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta);
+ }
+ output()->resize(input()->shape());
+}
+
+void Softmax::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S8:
+ evalQuantized<int8_t>();
+ break;
+ case DataType::U8:
+ evalQuantized<uint8_t>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Softmax::evalFloat() const
+{
+ tflite::SoftmaxParams op_params{};
+ op_params.beta = params().beta;
+
+ tflite::reference_ops::Softmax(op_params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+template <typename T> void Softmax::evalQuantized() const
+{
+ tflite::SoftmaxParams op_params{};
+ op_params.table = const_cast<float *>(_table);
+ op_params.zero_point = output()->zero_point();
+ op_params.scale = output()->scale();
+ luci_interpreter_pal::InitializeParams(&op_params, input()->scale(), params().beta);
+ luci_interpreter_pal::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()),
+ getTensorShape(output()), getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h
new file mode 100644
index 000000000..1f281df1c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SOFTMAX_H
+#define LUCI_INTERPRETER_KERNELS_SOFTMAX_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Softmax : public KernelWithParams<SoftmaxParams>
+{
+public:
+ Softmax(const Tensor *input, Tensor *output, const SoftmaxParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ template <typename T> void evalQuantized() const;
+
+ float _table[256];
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp
new file mode 100644
index 000000000..08e70672d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Softmax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> constexpr loco::DataType toLocoDataType();
+
+template <> constexpr loco::DataType toLocoDataType<float>() { return loco::DataType::FLOAT32; }
+
+template <> constexpr loco::DataType toLocoDataType<uint8_t>() { return loco::DataType::U8; }
+
+template <> constexpr loco::DataType toLocoDataType<int8_t>() { return loco::DataType::S8; }
+
+template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<toLocoDataType<T>()>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(toLocoDataType<T>());
+
+ SoftmaxParams params{};
+ params.beta = 0.1;
+
+ Softmax kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ std::pair<float, int32_t> input_quant_param =
+ quantizationParams<T>(std::min<float>(std::min<float>(input_data), 0.f),
+ std::max<float>(std::max<float>(input_data), 0.f));
+ std::pair<float, int32_t> output_quant_param =
+ quantizationParams<T>(std::min<float>(std::min<float>(output_data), 0.f),
+ std::max<float>(std::max<float>(output_data), 0.f));
+ Tensor input_tensor = makeInputTensor<toLocoDataType<T>()>(input_shape, input_quant_param.first,
+ input_quant_param.second, input_data,
+ memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(toLocoDataType<T>(), output_quant_param.first, output_quant_param.second);
+
+ SoftmaxParams params{};
+ params.beta = 0.1;
+
+ Softmax kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class SoftmaxTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(SoftmaxTest, DataTypes);
+
+TYPED_TEST(SoftmaxTest, Simple)
+{
+ Check<TypeParam>({2, 1, 2, 3}, {2, 1, 2, 3},
+ {
+ 5, -9, 8, //
+ -7, 2, -4, //
+ 1, -2, 9, //
+ 3, -6, -1, //
+ },
+ {
+ 0.38514, 0.09497, 0.51989, //
+ 0.20792, 0.51141, 0.28067, //
+ 0.25212, 0.18678, 0.56110, //
+ 0.48149, 0.19576, 0.32275, //
+ });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp
new file mode 100644
index 000000000..630cd38c4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/Utils.h"
+
+#include "PALSpaceToBatchND.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+
+} // namespace
+
+SpaceToBatchND::SpaceToBatchND(const Tensor *input, const Tensor *block_shape,
+ const Tensor *paddings, Tensor *output)
+ : Kernel({input, block_shape, paddings}, {output})
+{
+}
+
+void SpaceToBatchND::configure()
+{
+ const auto *block_shape_data = block_shape()->data<int32_t>();
+ const auto *paddings_data = paddings()->data<int32_t>();
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+ LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+ int spatial_dims_num = input()->shape().num_dims() - 2;
+
+ LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+ LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+ LUCI_INTERPRETER_CHECK(paddings()->shape().num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(paddings()->shape().dim(0) == spatial_dims_num);
+ LUCI_INTERPRETER_CHECK(paddings()->shape().dim(1) == 2);
+
+ Shape output_shape = Shape(input()->shape().num_dims());
+ int output_batch_size = input()->shape().dim(0);
+ for (int i = 0; i < spatial_dims_num; ++i)
+ {
+ int final_dim_size =
+ (input()->shape().dim(i + 1) + paddings_data[i * 2] + paddings_data[i * 2 + 1]);
+ LUCI_INTERPRETER_CHECK(final_dim_size % block_shape_data[i] == 0);
+ output_shape.dim(i + 1) = final_dim_size / block_shape_data[i];
+ output_batch_size = output_batch_size * block_shape_data[i];
+ }
+ output_shape.dim(0) = output_batch_size;
+ output_shape.dim(input()->shape().num_dims() - 1) =
+ input()->shape().dim(input()->shape().num_dims() - 1);
+ output()->resize(output_shape);
+}
+
+void SpaceToBatchND::execute() const
+{
+ switch (input()->element_type())
+ {
+ tflite::SpaceToBatchParams op_params;
+ case DataType::FLOAT32:
+ op_params.output_offset = 0;
+ luci_interpreter_pal::SpaceToBatchND(
+ op_params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+ getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ op_params.output_offset = output()->zero_point();
+ luci_interpreter_pal::SpaceToBatchND(
+ op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+ getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h
new file mode 100644
index 000000000..0893003bb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SpaceToBatchND : public Kernel
+{
+public:
+ SpaceToBatchND(const Tensor *input, const Tensor *block_shape, const Tensor *paddings,
+ Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *block_shape() const { return _inputs[1]; }
+ const Tensor *paddings() const { return _inputs[2]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
new file mode 100644
index 000000000..3a8b0a812
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> block_shape_shape,
+ std::initializer_list<int32_t> paddings_shape,
+ std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+ std::initializer_list<int32_t> block_shape_data,
+ std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType element_type = getElementType<T>();
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor block_shape_tensor =
+ makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(element_type);
+
+ SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <>
+void Check<uint8_t>(
+ std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> block_shape_shape,
+ std::initializer_list<int32_t> paddings_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<int32_t> block_shape_data,
+ std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ std::pair<float, int32_t> input_quant_param =
+ quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, memory_manager.get());
+ Tensor block_shape_tensor =
+ makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, input_quant_param.first, input_quant_param.second);
+
+ SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, output_tensor.scale()));
+ EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class SpaceToBatchNDTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SpaceToBatchNDTest, DataTypes);
+
+TYPED_TEST(SpaceToBatchNDTest, Simple)
+{
+ Check<TypeParam>(/*input_shape=*/{1, 5, 2, 1}, /*block_shape_shape=*/{2},
+ /*paddings_shape=*/{2, 2},
+ /*output_shape=*/{6, 2, 2, 1},
+ /*input_data=*/{-1.0, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 1.0},
+ /*block_shape_data=*/{3, 2}, /*paddings_data=*/{1, 0, 2, 0},
+ /*output_data=*/{0, 0, 0, -0.5, 0, 0, 0, 0.6, 0, -1.0, 0, -0.7,
+ 0, 0.2, 0, 0.8, 0, -0.3, 0, -0.9, 0, 0.4, 0, 1.0});
+}
+
+TEST(SpaceToBatchNDTest, Invalid_Shape_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, memory_manager.get());
+ Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp
new file mode 100644
index 000000000..7c29e8cb0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SpaceToDepth.h"
+#include "Utils.h"
+#include "PALSpaceToDepth.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams &params)
+ : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
+{
+}
+
+void SpaceToDepth::configure()
+{
+ assert(input()->shape().num_dims() == 4);
+ assert(output()->element_type() == DataType::FLOAT32 ||
+ output()->element_type() == DataType::U8 || output()->element_type() == DataType::S8 ||
+ output()->element_type() == DataType::S32 || output()->element_type() == DataType::S64);
+ assert(input()->element_type() == output()->element_type());
+
+ const int block_size = params().block_size;
+ const int32_t input_height = input()->shape().dim(1);
+ const int32_t input_width = input()->shape().dim(2);
+ int32_t output_height = input_height / block_size;
+ int32_t output_width = input_width / block_size;
+
+ assert(input_height == output_height * block_size);
+ assert(input_width == output_width * block_size);
+
+ Shape output_shape(4);
+ output_shape.dim(0) = input()->shape().dim(0);
+ output_shape.dim(1) = output_height;
+ output_shape.dim(2) = output_width;
+ output_shape.dim(3) = input()->shape().dim(3) * block_size * block_size;
+
+ output()->resize(output_shape);
+}
+
+void SpaceToDepth::execute() const
+{
+ tflite::SpaceToDepthParams op_params{};
+ op_params.block_size = params().block_size;
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h
new file mode 100644
index 000000000..e66316b11
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
+#define LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SpaceToDepth : public KernelWithParams<SpaceToDepthParams>
+{
+public:
+ SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
new file mode 100644
index 000000000..4af488618
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToDepth.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class SpaceToDepthTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SpaceToDepthTest, DataTypes);
+
+TYPED_TEST(SpaceToDepthTest, SimpleCase)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ constexpr DataType element_type = getElementType<TypeParam>();
+ std::vector<TypeParam> input_data{1, 5, 6, 7, 2, 3, 4, 8};
+ Shape input_shape{1, 2, 2, 2};
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ std::vector<TypeParam> output_data{1, 5, 6, 7, 2, 3, 4, 8};
+ std::vector<int32_t> output_shape{1, 1, 1, 8};
+ Tensor output_tensor = makeOutputTensor(element_type);
+
+ SpaceToDepthParams params{};
+ params.block_size = 2;
+
+ SpaceToDepth kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+ ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp
new file mode 100644
index 000000000..1a563f307
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Split.h"
+
+#include "Utils.h"
+
+#include "PALSplit.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Split::Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs)
+ : Kernel({axis, input}, std::move(outputs))
+{
+}
+
+void Split::configure()
+{
+ assert(axis()->shape().num_elements() == 1);
+ _axis_value = getTensorData<int32_t>(axis())[0];
+ if (_axis_value < 0)
+ _axis_value += input()->shape().num_dims();
+ assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
+
+ const int32_t input_size = input()->shape().dim(_axis_value);
+ assert(input_size % _outputs.size() == 0);
+ const int32_t slice_size = input_size / _outputs.size();
+
+ Shape output_shape = input()->shape();
+ output_shape.dim(_axis_value) = slice_size;
+ for (Tensor *output : _outputs)
+ {
+ output->resize(output_shape);
+ }
+}
+
+void Split::execute() const
+{
+ tflite::SplitParams params{};
+ params.num_split = _outputs.size();
+ params.axis = _axis_value;
+
+#define TF_LITE_SPLIT(scalar) \
+ { \
+ VectorOfTensors<scalar, false> all_outputs(_outputs); \
+ luci_interpreter_pal::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+ all_outputs.shapes(), all_outputs.data()); \
+ }
+
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ TF_LITE_SPLIT(float);
+ break;
+ case DataType::U8:
+ TF_LITE_SPLIT(uint8_t);
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+#undef TF_LITE_SPLIT
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.h b/compiler/luci-micro/luci-interpreter/src/kernels/Split.h
new file mode 100644
index 000000000..9542b1e56
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_H
+#define LUCI_INTERPRETER_KERNELS_SPLIT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Split : public Kernel
+{
+public:
+ Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs);
+
+ const Tensor *axis() const { return _inputs[0]; }
+ const Tensor *input() const { return _inputs[1]; }
+ Tensor *output(int index) const { return _outputs[index]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ int32_t _axis_value{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPLIT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp
new file mode 100644
index 000000000..283cd9aa9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Split.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
+ std::vector<std::vector<T>> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ constexpr DataType element_type = getElementType<T>();
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get());
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+
+ std::vector<Tensor> output_tensors;
+ output_tensors.reserve(num_splits);
+ for (int i = 0; i < num_splits; ++i)
+ {
+ output_tensors.emplace_back(makeOutputTensor(element_type));
+ }
+
+ std::vector<Tensor *> output_tensor_ptrs(num_splits);
+ for (int i = 0; i < num_splits; ++i)
+ {
+ output_tensor_ptrs[i] = &output_tensors[i];
+ }
+
+ Split kernel(&axis_tensor, &input_tensor, std::move(output_tensor_ptrs));
+ kernel.configure();
+ for (int i = 0; i < num_splits; ++i)
+ {
+ memory_manager->allocate_memory(output_tensors[i]);
+ }
+ kernel.execute();
+
+ for (int i = 0; i < num_splits; ++i)
+ {
+ EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+ ::testing::ElementsAreArray(output_data[i]));
+ }
+}
+
+template <typename T> class SplitTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SplitTest, DataTypes);
+
+TYPED_TEST(SplitTest, FourDimensional)
+{
+ Check<TypeParam>(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ {
+ {1, 2, 3, 4, 5, 6, 7, 8}, //
+ {9, 10, 11, 12, 13, 14, 15, 16}, //
+ });
+ Check<TypeParam>(
+ /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ {
+ {1, 2, 3, 4, 9, 10, 11, 12}, //
+ {5, 6, 7, 8, 13, 14, 15, 16}, //
+ });
+ Check<TypeParam>(
+ /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ {
+ {1, 2, 5, 6, 9, 10, 13, 14}, //
+ {3, 4, 7, 8, 11, 12, 15, 16}, //
+ });
+ Check<TypeParam>(
+ /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ {
+ {1, 3, 5, 7, 9, 11, 13, 15}, //
+ {2, 4, 6, 8, 10, 12, 14, 16}, //
+ });
+}
+
+TYPED_TEST(SplitTest, OneDimensional)
+{
+ Check<TypeParam>(
+ /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8},
+ {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}});
+}
+
+TYPED_TEST(SplitTest, NegativeAxis)
+{
+ Check<TypeParam>(
+ /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ {
+ {1, 2, 3, 4, 5, 6, 7, 8}, //
+ {9, 10, 11, 12, 13, 14, 15, 16},
+ });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp
new file mode 100644
index 000000000..aa6820889
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SplitV.h"
+
+#include "Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SplitV::SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
+ std::vector<Tensor *> outputs)
+ : Kernel({input, size_splits, axis}, std::move(outputs))
+{
+}
+
+void SplitV::configure()
+{
+ assert(axis()->shape().num_elements() == 1);
+ _axis_value = getTensorData<int32_t>(axis())[0];
+ if (_axis_value < 0)
+ _axis_value += input()->shape().num_dims();
+ assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
+
+ auto num_split = static_cast<int32_t>(_outputs.size());
+ auto sizes_data = getTensorData<int32_t>(size_splits());
+
+ assert(size_splits()->shape().num_dims() == 1);
+
+ int32_t sum = 0;
+ const auto num_dims_size_spits = size_splits()->shape().dim(0);
+ int32_t count_neg_dim = 0;
+
+ for (int32_t i = 0; i < num_dims_size_spits - 1; ++i)
+ {
+ if (sizes_data[i] != -1)
+ {
+ sum += sizes_data[i];
+ }
+ else
+ {
+ count_neg_dim++;
+ }
+ }
+ assert(count_neg_dim < 2);
+ assert(size_splits()->shape().num_elements() == num_split);
+
+ auto output_shape = input()->shape();
+ for (int32_t i = 0; i < num_split; ++i)
+ {
+ if (sizes_data[i] == -1)
+ {
+ output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum;
+ }
+ else
+ {
+ output_shape.dim(_axis_value) = sizes_data[i];
+ }
+ _outputs[i]->resize(output_shape);
+ }
+}
+
+void SplitV::execute() const
+{
+ tflite::SplitParams params{};
+ params.num_split = _outputs.size();
+ params.axis = _axis_value;
+
+#define TF_LITE_SPLIT(scalar) \
+ { \
+ VectorOfTensors<scalar, false> all_outputs(_outputs); \
+ tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+ all_outputs.shapes(), all_outputs.data()); \
+ }
+
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ TF_LITE_SPLIT(float);
+ break;
+ case DataType::U8:
+ TF_LITE_SPLIT(uint8_t);
+ break;
+ case DataType::S16:
+ TF_LITE_SPLIT(int16_t);
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+#undef TF_LITE_SPLIT
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h
new file mode 100644
index 000000000..92f6288fb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_V_H
+#define LUCI_INTERPRETER_KERNELS_SPLIT_V_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SplitV : public Kernel
+{
+public:
+ SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
+ std::vector<Tensor *> outputs);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *size_splits() const { return _inputs[1]; }
+ const Tensor *axis() const { return _inputs[2]; }
+ Tensor *output(int index) const { return _outputs[index]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ int32_t _axis_value{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPLIT_V_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp
new file mode 100644
index 000000000..035bc2122
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SplitV.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, std::initializer_list<int32_t> splits_size,
+ std::initializer_list<int32_t> input_shape, std::initializer_list<T> input_data,
+ std::vector<std::vector<T>> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType element_type = getElementType<T>();
+
+ auto num_splits = static_cast<int32_t>(splits_size.size());
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor sizes_tensor =
+ makeInputTensor<DataType::S32>({num_splits}, splits_size, memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get());
+
+ std::vector<Tensor> output_tensors;
+ output_tensors.reserve(num_splits);
+ for (int i = 0; i < num_splits; ++i)
+ {
+ output_tensors.emplace_back(makeOutputTensor(element_type));
+ }
+
+ std::vector<Tensor *> output_tensor_ptrs(num_splits);
+ for (int i = 0; i < num_splits; ++i)
+ {
+ output_tensor_ptrs[i] = &output_tensors[i];
+ }
+
+ SplitV kernel(&input_tensor, &sizes_tensor, &axis_tensor, std::move(output_tensor_ptrs));
+ kernel.configure();
+ for (int i = 0; i < num_splits; ++i)
+ {
+ memory_manager->allocate_memory(output_tensors[i]);
+ }
+ kernel.execute();
+
+ for (int i = 0; i < num_splits; ++i)
+ {
+ auto tmp = extractTensorData<T>(output_tensors[i]);
+ EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+ ::testing::ElementsAreArray(output_data[i]));
+ }
+}
+
+template <typename T> class SplitVTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
+TYPED_TEST_SUITE(SplitVTest, DataTypes);
+
+TYPED_TEST(SplitVTest, ThreeDimensional)
+{
+ Check<TypeParam>(
+ /*axis=*/0, /*splits_size=*/{1, 2}, {3, 3, 3},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+ {
+ {1, 2, 3, 4, 5, 6, 7, 8, 9}, //
+ {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27} //
+ });
+ Check<TypeParam>(
+ /*axis=*/1, /*splits_size=*/{1, 2}, {3, 3, 3},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+ {
+ {1, 2, 3, 10, 11, 12, 19, 20, 21}, //
+ {4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 22, 23, 24, 25, 26, 27} //
+ });
+ Check<TypeParam>(
+ /*axis=*/2, /*splits_size=*/{1, 2}, {3, 3, 3},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+ {
+ {1, 4, 7, 10, 13, 16, 19, 22, 25}, //
+ {2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, 24, 26, 27} //
+ });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp
new file mode 100644
index 000000000..46e9fc9ad
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Sqrt::configure()
+{
+ if (input()->element_type() != output()->element_type())
+ {
+ throw std::runtime_error("Input/output tensor data type mismatch.");
+ }
+ output()->resize(input()->shape());
+}
+
+void Sqrt::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Sqrt::evalFloat() const
+{
+ auto in = getTensorData<float>(input());
+ auto out = getTensorData<float>(output());
+ auto size = getTensorShape(input()).FlatSize();
+ for (auto i = in; i != in + size; ++i)
+ {
+ *out = std::sqrt(*i);
+ ++out;
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h
new file mode 100644
index 000000000..4034655ed
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQRT_H
+#define LUCI_INTERPRETER_KERNELS_SQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Sqrt : public Kernel
+{
+public:
+ Sqrt(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQRT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp
new file mode 100644
index 000000000..96835fbfc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Sqrt kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(SqrtTest, SimpleSqrt)
+{
+ Check(
+ /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+ /*input_data=*/
+ {
+ 0, 8, 2, 4, //
+ 3, 7, 10, 0.3, //
+ },
+ /*output_data=*/
+ {
+ 0.0, 2.8284271, 1.4142136, 2, //
+ 1.7320508, 2.6457513, 3.1622777, 0.54772256, //
+ });
+}
+
+TEST(SqrtTest, Input_Output_Type_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ Sqrt kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(SqrtTest, Invalid_Input_Type_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ Sqrt kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp
new file mode 100644
index 000000000..bc71905c1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Square.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Square::Square(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Square::configure()
+{
+ if (input()->element_type() != output()->element_type())
+ {
+ throw std::runtime_error("Input/output tensor data type mismatch.");
+ }
+ output()->resize(input()->shape());
+}
+
+void Square::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Square::evalFloat() const
+{
+ auto in = getTensorData<float>(input());
+ auto out = getTensorData<float>(output());
+ auto size = getTensorShape(input()).FlatSize();
+ for (auto i = in; i != in + size; ++i)
+ {
+ *out = (*i) * (*i);
+ ++out;
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.h b/compiler/luci-micro/luci-interpreter/src/kernels/Square.h
new file mode 100644
index 000000000..73ed5a707
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUARE_H
+#define LUCI_INTERPRETER_KERNELS_SQUARE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Square : public Kernel
+{
+public:
+ Square(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUARE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp
new file mode 100644
index 000000000..51662dea7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Square.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(SquareTest, Float)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Shape input_shape{3, 1, 2};
+ std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Square kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{1.0, 0.0, 1.0, 121.0, 4.0, 2.0736};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp
new file mode 100644
index 000000000..3bafeba4a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SquaredDifference::SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output)
+ : Kernel({input1, input2}, {output})
+{
+}
+
+void SquaredDifference::configure()
+{
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void SquaredDifference::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalSquaredDifference<float>();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+template <typename T> inline void SquaredDifference::evalSquaredDifference() const
+{
+ BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()), [](T x, T y) {
+ const T difference = x - y;
+ return difference * difference;
+ });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h
new file mode 100644
index 000000000..9327caf93
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
+#define LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SquaredDifference : public Kernel
+{
+public:
+ SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+ const Tensor *input1() const { return _inputs[0]; }
+ const Tensor *input2() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> inline void evalSquaredDifference() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp
new file mode 100644
index 000000000..2819c01e2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(SquaredDifferenceTest, Float)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Shape input_shape{3, 1, 2};
+ std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+ std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+ Tensor input_tensor1 =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{4.0, 0.0, 4.0, 1.0, 1.0, 0.0001};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(SquaredDifferenceTest, FloatBroadcast)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Shape input_shape1{3, 1, 2};
+ Shape input_shape2{1};
+ std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+ std::vector<float> input_data2{1.0};
+ Tensor input_tensor1 =
+ makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1, memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0.0, 1.0, 4.0, 100.0, 9.0, 5.9536};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp
new file mode 100644
index 000000000..4a75518c7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Squeeze.h"
+
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams &params)
+ : KernelWithParams<SqueezeParams>({input}, {output}, params)
+{
+}
+
+void Squeeze::configure()
+{
+ int input_num_dims = input()->shape().num_dims();
+ int num_squeeze_dims = params().squeeze_dims.size();
+ assert(input_num_dims <= 8);
+ bool should_squeeze[8] = {false};
+ int num_squeezed_dims = 0;
+ if (num_squeeze_dims == 0)
+ {
+ for (int idx = 0; idx < input_num_dims; ++idx)
+ {
+ if (input()->shape().dim(idx) == 1)
+ {
+ should_squeeze[idx] = true;
+ ++num_squeezed_dims;
+ }
+ }
+ }
+ else
+ {
+ for (int idx = 0; idx < num_squeeze_dims; ++idx)
+ {
+ int current = params().squeeze_dims[idx] < 0 ? params().squeeze_dims[idx] + input_num_dims
+ : params().squeeze_dims[idx];
+ assert(current >= 0 && current < input_num_dims && input()->shape().dim(current) == 1);
+ if (!should_squeeze[current])
+ ++num_squeezed_dims;
+ should_squeeze[current] = true;
+ }
+ }
+ Shape output_shape(input_num_dims - num_squeezed_dims);
+ for (int in_idx = 0, out_idx = 0; in_idx < input_num_dims; ++in_idx)
+ {
+ if (!should_squeeze[in_idx])
+ {
+ output_shape.dim(out_idx++) = input()->shape().dim(in_idx);
+ }
+ }
+ output()->resize(output_shape);
+}
+
+void Squeeze::execute() const
+{
+ assert(input()->shape().num_elements() == output()->shape().num_elements());
+
+ const auto *input_data = input()->data<void>();
+ auto *output_data = output()->data<void>();
+ std::memcpy(output_data, input_data,
+ getDataTypeSize(input()->element_type()) * input()->shape().num_elements());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h
new file mode 100644
index 000000000..687af5158
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUEEZE_H
+#define LUCI_INTERPRETER_KERNELS_SQUEEZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Squeeze : public KernelWithParams<SqueezeParams>
+{
+public:
+ Squeeze(const Tensor *input, Tensor *output, const SqueezeParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUEEZE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp
new file mode 100644
index 000000000..1bc0b6459
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Squeeze.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<T> input_data, std::initializer_list<T> output_data,
+ std::initializer_list<int32_t> squeeze_dims)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ constexpr DataType element_type = getElementType<T>();
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(element_type);
+
+ SqueezeParams params{};
+ params.squeeze_dims = squeeze_dims;
+
+ Squeeze kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class SqueezeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SqueezeTest, DataTypes);
+
+TYPED_TEST(SqueezeTest, TotalTest)
+{
+ Check<TypeParam>(
+ /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24},
+ /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+ /*output_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+ {-1, 0});
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp
new file mode 100644
index 000000000..a8730d861
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/StridedSlice.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/strided_slice.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+StridedSlice::StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end,
+ const Tensor *strides, Tensor *output, const StridedSliceParams &params)
+ : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params)
+{
+}
+
+void StridedSlice::configure()
+{
+ assert(begin()->shape().num_dims() == 1);
+ assert(end()->shape().num_dims() == 1);
+ assert(strides()->shape().num_dims() == 1);
+ assert(input()->element_type() == output()->element_type());
+ assert(begin()->element_type() == DataType::S32);
+ assert(end()->element_type() == DataType::S32);
+ assert(strides()->element_type() == DataType::S32);
+ assert(input()->shape().num_dims() <= 4);
+ if (params().ellipsis_mask != 0)
+ {
+ throw std::runtime_error("ellipsis_mask is not implemented yet.");
+ }
+ if (params().new_axis_mask != 0)
+ {
+ throw std::runtime_error("new_axis_mask is not implemented yet.");
+ }
+ if (input()->element_type() == DataType::U8)
+ {
+ assert(input()->scale() == output()->scale());
+ assert(input()->zero_point() == output()->zero_point());
+ }
+ tflite::StridedSliceParams op_params{};
+ op_params.start_indices_count = input()->shape().num_dims();
+ op_params.stop_indices_count = input()->shape().num_dims();
+ op_params.strides_count = input()->shape().num_dims();
+
+ for (int i = 0; i < input()->shape().num_dims(); i++)
+ {
+ op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
+ op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
+ op_params.strides[i] = getTensorData<int32_t>(strides())[i];
+ }
+ op_params.begin_mask = params().begin_mask;
+ op_params.ellipsis_mask = 0;
+ op_params.end_mask = params().end_mask;
+ op_params.new_axis_mask = 0;
+ op_params.shrink_axis_mask = params().shrink_axis_mask;
+ std::vector<int32_t> output_shape_vector;
+ for (int i = 0; i < input()->shape().num_dims(); i++)
+ {
+ int idx = input()->shape().num_dims() - i - 1;
+ int32_t stride = getTensorData<int32_t>(strides())[idx];
+ assert(stride != 0);
+ int32_t begin = ::tflite::strided_slice::StartForAxis(op_params, getTensorShape(input()), idx);
+ int32_t end =
+ ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin);
+
+ const bool shrink_axis = params().shrink_axis_mask & (1 << idx);
+ if (shrink_axis)
+ {
+ end = begin + 1;
+ }
+
+ int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
+ dim_shape = dim_shape < 0 ? 0 : dim_shape;
+ if (!shrink_axis)
+ {
+ output_shape_vector.push_back(dim_shape);
+ }
+ }
+ Shape output_shape = Shape(output_shape_vector.size());
+ for (size_t i = 0; i < output_shape_vector.size(); i++)
+ {
+ output_shape.dim(i) = output_shape_vector[output_shape_vector.size() - i - 1];
+ }
+ output()->resize(output_shape);
+}
+
+void StridedSlice::execute() const
+{
+ tflite::StridedSliceParams op_params{};
+ op_params.start_indices_count = input()->shape().num_dims();
+ op_params.stop_indices_count = input()->shape().num_dims();
+ op_params.strides_count = input()->shape().num_dims();
+
+ for (int i = 0; i < input()->shape().num_dims(); i++)
+ {
+ op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
+ op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
+ op_params.strides[i] = getTensorData<int32_t>(strides())[i];
+ }
+ op_params.begin_mask = params().begin_mask;
+ op_params.ellipsis_mask = 0;
+ op_params.end_mask = params().end_mask;
+ op_params.new_axis_mask = 0;
+ op_params.shrink_axis_mask = params().shrink_axis_mask;
+
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ case DataType::S32:
+ tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+ getTensorData<int32_t>(input()), getTensorShape(output()),
+ getTensorData<int32_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h
new file mode 100644
index 000000000..fc96893a7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H
+#define LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class StridedSlice : public KernelWithParams<StridedSliceParams>
+{
+public:
+ StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end, const Tensor *strides,
+ Tensor *output, const StridedSliceParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *begin() const { return _inputs[1]; }
+ const Tensor *end() const { return _inputs[2]; }
+ const Tensor *strides() const { return _inputs[3]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp
new file mode 100644
index 000000000..399cdebed
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/StridedSlice.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(StridedSliceTest, Float)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Shape input_shape{2, 3, 2};
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+ Shape begin_shape{3};
+ std::vector<int32_t> begin_data{0, 0, 0};
+ Shape end_shape{3};
+ std::vector<int32_t> end_data{1, 3, 2};
+ Shape strides_shape{3};
+ std::vector<int32_t> strides_data{1, 1, 1};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor begin_tensor =
+ makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+ Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get());
+ Tensor strides_tensor =
+ makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ StridedSliceParams params{};
+ params.begin_mask = 0;
+ params.end_mask = 0;
+ params.ellipsis_mask = 0;
+ params.new_axis_mask = 0;
+ params.shrink_axis_mask = 1;
+
+ StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor,
+ params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<int32_t> output_shape{3, 2};
+ std::vector<float> output_data{1, 2, 3, 4, 5, 6};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(StridedSliceTest, Uint8)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Shape input_shape{2, 3, 2};
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+ Shape begin_shape{3};
+ std::vector<int32_t> begin_data{0, 0, 0};
+ Shape end_shape{3};
+ std::vector<int32_t> end_data{1, 3, 2};
+ Shape strides_shape{3};
+ std::vector<int32_t> strides_data{1, 1, 1};
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, 1.0f, 0, input_data, memory_manager.get());
+ Tensor begin_tensor =
+ makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+ Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get());
+ Tensor strides_tensor =
+ makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0f, 0);
+
+ StridedSliceParams params{};
+ params.begin_mask = 0;
+ params.end_mask = 0;
+ params.ellipsis_mask = 0;
+ params.new_axis_mask = 0;
+ params.shrink_axis_mask = 1;
+
+ StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor,
+ params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<int32_t> output_shape{3, 2};
+ std::vector<float> output_data{1, 2, 3, 4, 5, 6};
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp
new file mode 100644
index 000000000..24b6a72e5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sub.h"
+#include "kernels/Utils.h"
+
+#include "PALSub.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params)
+ : KernelWithParams<SubParams>({input1, input2}, {output}, params)
+{
+}
+
+void Sub::configure()
+{
+ LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
+ LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type()))
+ output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Sub::execute() const
+{
+ switch (input1()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Sub::evalFloat() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<float>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastSubSlow(
+ params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+ getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+ }
+ else
+ {
+ luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
+ getTensorShape(input2()), getTensorData<float>(input2()),
+ getTensorShape(output()), getTensorData<float>(output()));
+ }
+}
+
+template <typename T> void Sub::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastSubSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
+void Sub::evalQuantized() const
+{
+ const auto input1_scale = static_cast<double>(input1()->scale());
+ const auto input2_scale = static_cast<double>(input2()->scale());
+ const auto output_scale = static_cast<double>(output()->scale());
+
+ const int left_shift = 20;
+ const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+ const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+ const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+ const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+ int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+ int input1_shift{}, input2_shift{}, output_shift{};
+ quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+ quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+ quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::ArithmeticParams params{};
+ params.left_shift = left_shift;
+ // The kernel expects inputs' zero points to be negated.
+ params.input1_offset = -input1()->zero_point(); // Note the '-'.
+ params.input1_multiplier = input1_multiplier;
+ params.input1_shift = input1_shift;
+ params.input2_offset = -input2()->zero_point(); // Note the '-'.
+ params.input2_multiplier = input2_multiplier;
+ params.input2_shift = input2_shift;
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = output_multiplier;
+ params.output_shift = output_shift;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastSubSlow(
+ params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+ getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+ getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h
new file mode 100644
index 000000000..23952b3bd
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SUB_H
+#define LUCI_INTERPRETER_KERNELS_SUB_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Sub : public KernelWithParams<SubParams>
+{
+public:
+ Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params);
+
+ const Tensor *input1() const { return _inputs[0]; }
+ const Tensor *input2() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ template <typename T> void evalInteger() const;
+ void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SUB_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp
new file mode 100644
index 000000000..9abafd49a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sub.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+#include <algorithm>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+using std::pair;
+using std::vector;
+using std::transform;
+using std::initializer_list;
+
+class SubTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// for quantized Add, the error shouldn't exceed step
+float GetTolerance(float min, float max)
+{
+ float kQuantizedStep = (max - min) / 255.0;
+ return kQuantizedStep;
+}
+
+TEST_F(SubTest, Uint8)
+{
+ Shape base_shape = {2, 3, 1, 2};
+ vector<float> base_data = {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
+ 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+ vector<Shape> test_shapes = {{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+ vector<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+ vector<vector<int32_t>> output_shapes = {{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+ vector<vector<float>> output_data = {
+ {-0.5f, 2.0f, 0.1f, 1.8f, -1.3f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, -0.1f, -0.4f,
+ 0.6f, -1.4f, 1.2f, -1.6f, -0.2f, -2.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f,
+ -1.8f, -0.3f, -1.2f, -0.5f, -2.6f, -0.9f, 0.5f, -2.5f, 1.1f, -2.7f, -0.3f, -3.0f},
+ {-0.5f, 2.0f, 1.3f, 0.0f, -0.2f, -2.0f, 1.0f, 2.5f, -1.2f, -0.5f, -0.3f, -3.0f},
+ {-0.5f, 2.1f, -0.6f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f,
+ 0.6f, -1.3f, 0.5f, -1.4f, 1.2f, -0.7f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f,
+ -2.1f, -0.5f, -2.6f, -1.0f, -2.5f, -0.9f, 0.2f, -2.7f, -0.3f, -3.0f, -0.2f, -3.0f},
+ {-0.5f, 2.1f, 0.6f, 0.2f, 1.2f, -0.7f, 0.7f, 2.3f, -2.6f, -1.0f, -0.2f, -3.0f}};
+
+ float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
+ pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
+ for (size_t i = 0; i < output_data.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DataType::U8>(
+ base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U8>(
+ test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+ SubParams params{};
+ params.activation = Activation::NONE;
+
+ Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data[i], kQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+ }
+
+ // Inversion step for output_data, because subtract is not commutative operation
+ auto multiply = [](auto &i) {
+ transform(i.begin(), i.end(), i.begin(), [](auto &value) { return value * -1.0f; });
+ };
+ for_each(output_data.begin(), output_data.end(), multiply);
+
+ // Re-run with exchanged inputs.
+ for (size_t i = 0; i < output_data.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DataType::U8>(
+ test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U8>(
+ base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+ SubParams params{};
+ params.activation = Activation::NONE;
+
+ Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data[i], kQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+ }
+}
+
+TEST_F(SubTest, Float)
+{
+ Shape base_shape = {2, 3, 1, 2};
+ vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+ vector<vector<int32_t>> output_shapes{{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+ vector<vector<float>> test_outputs = {
+ {0.0f, 2.0f, 0.1f, 1.8f, 0.0f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, 0.0f, 0.0f,
+ 0.6f, 0.0f, 1.2f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f,
+ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.0f, 1.1f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 2.0f, 1.3f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 2.1f, 0.0f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f,
+ 0.6f, 0.0f, 0.5f, 0.0f, 1.2f, 0.0f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f,
+ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.2f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+ {0.0f, 2.1f, 0.6f, 0.2f, 1.2f, 0.0f, 0.7f, 2.3f, 0.0f, 0.0f, 0.0f, 0.0f}};
+
+ vector<float> input1_data{-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
+ 1.2f, 2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+ vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ SubParams params{};
+ params.activation = Activation::RELU;
+
+ Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+ << "With shape number " << i;
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+ }
+}
+
+template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+ std::vector<std::vector<dtype>> test_outputs = {
+ {0, 1, 2, 3, 0, 0, 0, 0, 4, 1, 0, 0, 0, 0, 7, 0, 3, 0,
+ 0, 2, 4, 4, 0, 0, 3, 0, 10, 0, 6, 0, 3, 0, 10, 2, 6, 0},
+ {0, 1, 4, 1, 3, 0, 0, 2, 10, 0, 6, 0},
+ {0, 0, 0, 1, 2, 5, 0, 0, 0, 0, 4, 3, 0, 0, 3, 0, 7, 0,
+ 2, 4, 0, 2, 0, 0, 8, 0, 6, 0, 1, 0, 8, 2, 6, 0, 1, 0},
+ {0, 0, 0, 0, 7, 0, 2, 4, 6, 0, 1, 0}};
+ std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1};
+ std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ SubParams params{};
+ params.activation = Activation::RELU;
+
+ Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+};
+
+TEST_F(SubTest, SInt32)
+{
+ CheckInteger<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(SubTest, SInt64)
+{
+ CheckInteger<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(SubTest, Input_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ SubParams params{};
+ params.activation = Activation::RELU;
+
+ Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SubTest, Invalid_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ SubParams params{};
+ params.activation = Activation::RELU;
+
+ Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SubTest, Invalid_Input_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+ SubParams params{};
+ params.activation = Activation::RELU;
+
+ Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(SubTest, Mismatching_Input_Int_Types_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ SubParams params{};
+ params.activation = Activation::NONE;
+
+ Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp
new file mode 100644
index 000000000..c4fa16912
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/tanh.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Tanh::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+ if (input()->element_type() == DataType::U8)
+ {
+ populateLookupTable();
+ }
+ output()->resize(input()->shape());
+}
+
+void Tanh::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ evalQuantized();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Tanh::evalFloat() const
+{
+ tflite::reference_ops::Tanh(getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void Tanh::evalQuantized() const
+{
+ const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+ uint8_t *output_data = getTensorData<uint8_t>(output());
+ const uint8_t *input_data = getTensorData<uint8_t>(input());
+ for (int i = 0; i < size; ++i)
+ {
+ output_data[i] = getTableValue(input_data[i]);
+ }
+}
+
+void Tanh::populateLookupTable()
+{
+ const auto input_scale = static_cast<double>(input()->scale());
+ const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
+ const auto output_scale = static_cast<double>(output()->scale());
+ const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
+ const float inverse_scale = 1 / output_scale;
+ int32_t maxval = std::numeric_limits<uint8_t>::max();
+ int32_t minval = std::numeric_limits<uint8_t>::min();
+ for (int32_t val = minval; val <= maxval; ++val)
+ {
+ const float dequantized = input_scale * (val - input_zero_point);
+ const float transformed = std::tanh(dequantized);
+ const float rescaled = std::round(transformed * inverse_scale);
+ const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+ setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
+ static_cast<uint8_t>(val));
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h
new file mode 100644
index 000000000..8017c9638
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TANH_H
+#define LUCI_INTERPRETER_KERNELS_TANH_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Tanh : public Kernel
+{
+public:
+ Tanh(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+ void populateLookupTable();
+ void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; };
+ uint8_t getTableValue(uint8_t idx) const { return _table[idx]; };
+
+private:
+ uint8_t _table[256]{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TANH_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp
new file mode 100644
index 000000000..bfae479a9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class TanhTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(TanhTest, Float)
+{
+ Shape input_shape{1, 2, 4, 1};
+ std::vector<float> input_data{
+ 0, -6, 2, 4, //
+ 3, -2, 10, 1, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tanh kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 0, -0.9999877, 0.9640275, 0.999329, //
+ 0.99505475, -0.9640275, 1, 0.7615941, //
+ };
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(TanhTest, Uint8)
+{
+ float kMin = -1;
+ float kMax = 127.f / 128.f;
+ float kTanhTolerance = 2 * (1. / 256);
+ std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(8 * kMin, 8 * kMax);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(kMin, kMax);
+ std::vector<float> input_data{
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+ Tanh kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ 0.0, -0.999987, 0.964027, 0.999329, //
+ -0.999329, -0.96402, 0.99999, 0.76159, //
+ };
+ std::vector<int32_t> ref_output_shape{2, 6, 4, 1};
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data, kTanhTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(TanhTest, InputTypeInvalid_NEG)
+{
+ std::vector<int64_t> input_data{
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::S64>({2, 6, 4, 1}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tanh kernel(&input_tensor, &output_tensor);
+ _memory_manager->allocate_memory(output_tensor);
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(TanhTest, InputOutputMismatch_NEG)
+{
+ std::vector<float> input_data{
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ 0, -6, 2, 4, //
+ -4, -2, 8, 1, //
+ };
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 6, 4, 1}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+ Tanh kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp
new file mode 100644
index 000000000..4d983adda
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace testing
+{
+
+using ::testing::FloatNear;
+using ::testing::Matcher;
+
+Tensor makeOutputTensor(DataType element_type) { return Tensor(element_type, {}, {}, ""); }
+
+Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point)
+{
+ return Tensor(element_type, {}, {{scale}, {zero_point}}, "");
+}
+
+std::vector<float> dequantizeTensorData(const Tensor &tensor)
+{
+ if (tensor.element_type() == DataType::U8)
+ {
+ std::vector<uint8_t> data = extractTensorData<uint8_t>(tensor);
+ return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
+ }
+ if (tensor.element_type() == DataType::S8)
+ {
+ std::vector<int8_t> data = extractTensorData<int8_t>(tensor);
+ return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
+ }
+ else if (tensor.element_type() == DataType::S16)
+ {
+ // S16 quantization is symmetric, so zero point should be zero.
+ for (auto zp : tensor.zero_points())
+ {
+ (void)zp;
+ assert(zp == 0);
+ }
+
+ std::vector<int16_t> data = extractTensorData<int16_t>(tensor);
+ if (tensor.scales().size() == 1)
+ {
+ return dequantize(data.data(), data.size(), tensor.scale(), 0);
+ }
+
+ // quantize_dimension breaks shape into two parts:
+ // inner dimensions that contains continuous data with one quantization type
+ // outer dimensions that contains other dimensions
+ const Shape shape = tensor.shape();
+ const int32_t quantized_dimension = tensor.quantized_dimension();
+ assert(quantized_dimension < shape.num_dims());
+ size_t outer_dims_size = 1;
+ int32_t quant_dim_size = shape.dim(quantized_dimension);
+ size_t inner_dims_size = 1;
+ assert(quant_dim_size == tensor.scales().size());
+
+ for (int i = 0; i < quantized_dimension; ++i)
+ outer_dims_size *= shape.dim(i);
+ for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i)
+ inner_dims_size *= shape.dim(i);
+
+ assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size);
+
+ std::vector<float> dequantized_data;
+ dequantized_data.reserve(shape.num_elements());
+ for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
+ for (int32_t channel = 0; channel < quant_dim_size; ++channel)
+ {
+ float scale = tensor.scales()[channel];
+ size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
+ std::vector<float> part_dequantized_data =
+ dequantize(data.data() + offset, inner_dims_size, scale, 0);
+ dequantized_data.insert(dequantized_data.end(), part_dequantized_data.begin(),
+ part_dequantized_data.end());
+ }
+ return dequantized_data;
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values, float max_abs_error)
+{
+ std::vector<Matcher<float>> matchers;
+ matchers.reserve(values.size());
+ for (const float v : values)
+ {
+ matchers.emplace_back(FloatNear(v, max_abs_error));
+ }
+ return ElementsAreArray(matchers);
+}
+
+std::vector<int32_t> extractTensorShape(const Tensor &tensor)
+{
+ std::vector<int32_t> result;
+ int dims = tensor.shape().num_dims();
+ for (int i = 0; i < dims; i++)
+ {
+ result.push_back(tensor.shape().dim(i));
+ }
+ return result;
+}
+
+} // namespace testing
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h
new file mode 100644
index 000000000..1f5a0c308
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TESTUTILS_H
+#define LUCI_INTERPRETER_KERNELS_TESTUTILS_H
+
+#include "luci_interpreter/core/Tensor.h"
+#include "luci_interpreter/MemoryManager.h"
+
+#include <type_traits>
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace testing
+{
+
+template <typename T>
+std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point);
+
+template <DataType DT>
+Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data,
+ IMemoryManager *memory_manager)
+{
+ Tensor tensor(DT, shape, {}, "");
+ memory_manager->allocate_memory(tensor);
+ tensor.writeData(data.data(), data.size() * sizeof(typename DataTypeImpl<DT>::Type));
+ return tensor;
+}
+
+/**
+ * @brief Create layer-wise quantized tensor
+ * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64
+ * @param shape desired tensor shape
+ * @param scale scale of quantized number
+ * @param zero_point zero point of quantized number, should be 0 for signed datatypes
+ * @param data floating point data for quantization
+ * @param memory_manager memory manager for allocating memory to tensor
+ * @return created tensor
+ */
+template <DataType DT>
+Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point,
+ const std::vector<float> &data, IMemoryManager *memory_manager)
+{
+ using NativeT = typename DataTypeImpl<DT>::Type;
+ Tensor tensor(DT, shape, {{scale}, {zero_point}}, "");
+ std::vector<NativeT> quantized_data =
+ quantize<NativeT>(data.data(), data.size(), scale, zero_point);
+ memory_manager->allocate_memory(tensor);
+ tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
+ return tensor;
+}
+
+/**
+ * @brief Create channel-wise quantized tensor
+ * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64
+ * @param shape desired tensor shape
+ * @param scales scales of quantized number
+ * @param zero_points zero points of quantized number, should be 0 for signed datatypes
+ * @param quantize_dimension dimension to apply quantization along. Usually channels/output channels
+ * @param data floating point data for quantization
+ * @param memory_manager memory manager for allocating memory to tensor
+ * @return created tensor
+ */
+template <DataType DT>
+Tensor makeInputTensor(const Shape &shape, const std::vector<float> &scales,
+ const std::vector<int32_t> &zero_points, int quantized_dimension,
+ const std::vector<float> &data, IMemoryManager *memory_manager)
+{
+ using NativeT = typename DataTypeImpl<DT>::Type;
+ assert(quantized_dimension < shape.num_dims());
+ Tensor tensor(DT, shape, {scales, zero_points, quantized_dimension}, "");
+
+ // quantize_dimension breaks shape into two parts:
+ // inner dimensions that contains continuous data with one quantization type
+ // outer dimensions that contains other dimensions
+ size_t outer_dims_size = 1;
+ int32_t quant_dim_size = shape.dim(quantized_dimension);
+ size_t inner_dims_size = 1;
+ assert(quant_dim_size == scales.size());
+ assert(quant_dim_size == zero_points.size());
+
+ for (int i = 0; i < quantized_dimension; ++i)
+ outer_dims_size *= shape.dim(i);
+ for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i)
+ inner_dims_size *= shape.dim(i);
+
+ assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size);
+
+ std::vector<NativeT> quantized_data;
+ quantized_data.reserve(shape.num_elements());
+ for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
+ for (int32_t channel = 0; channel < quant_dim_size; ++channel)
+ {
+ int32_t zero_point = zero_points[channel];
+ float scale = scales[channel];
+ size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
+ std::vector<NativeT> part_quantized_data =
+ quantize<NativeT>(data.data() + offset, inner_dims_size, scale, zero_point);
+ quantized_data.insert(quantized_data.end(), part_quantized_data.begin(),
+ part_quantized_data.end());
+ }
+ assert(quantized_data.size() == shape.num_elements());
+ memory_manager->allocate_memory(tensor);
+ tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
+ return tensor;
+}
+
+Tensor makeOutputTensor(DataType element_type);
+Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point);
+
+std::vector<int32_t> extractTensorShape(const Tensor &tensor);
+
+// Returns the corresponding DataType given the type T.
+template <typename T> constexpr DataType getElementType()
+{
+ if (std::is_same<T, float>::value)
+ return DataType::FLOAT32;
+ if (std::is_same<T, double>::value)
+ return DataType::FLOAT64;
+ if (std::is_same<T, uint8_t>::value)
+ return DataType::U8;
+ if (std::is_same<T, uint16_t>::value)
+ return DataType::U16;
+ if (std::is_same<T, uint32_t>::value)
+ return DataType::U32;
+ if (std::is_same<T, uint64_t>::value)
+ return DataType::U64;
+ if (std::is_same<T, int8_t>::value)
+ return DataType::S8;
+ if (std::is_same<T, int16_t>::value)
+ return DataType::S16;
+ if (std::is_same<T, int32_t>::value)
+ return DataType::S32;
+ if (std::is_same<T, int64_t>::value)
+ return DataType::S64;
+ if (std::is_same<T, bool>::value)
+ return DataType::BOOL;
+ return DataType::Unknown;
+}
+
+template <typename T> std::vector<T> extractTensorData(const Tensor &tensor)
+{
+ const auto *data_ptr = tensor.data<T>();
+ return std::vector<T>(data_ptr, data_ptr + tensor.shape().num_elements());
+}
+
+std::vector<float> dequantizeTensorData(const Tensor &tensor);
+
+// Array version of `::testing::FloatNear` matcher.
+::testing::Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values,
+ float max_abs_error = 1.0e-5f);
+
+template <typename T>
+std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point)
+{
+ static_assert(std::is_integral<T>::value, "Integral type expected.");
+
+ float q_min{}, q_max{};
+ if (std::is_signed<T>::value)
+ {
+ q_min = -std::numeric_limits<T>::max();
+ q_max = std::numeric_limits<T>::max();
+ }
+ else
+ {
+ q_min = 0;
+ q_max = std::numeric_limits<T>::max();
+ }
+
+ std::vector<T> q;
+ for (size_t i = 0; i < num_elements; ++i)
+ {
+ const auto &f = data[i];
+ q.push_back(static_cast<T>(
+ std::max<float>(q_min, std::min<float>(q_max, std::round(zero_point + (f / scale))))));
+ }
+ return q;
+}
+
+template <typename T>
+std::vector<float> dequantize(const T *data, size_t num_elements, float scale, int32_t zero_point)
+{
+ static_assert(std::is_integral<T>::value, "Integral type expected.");
+ std::vector<float> f;
+ for (size_t i = 0; i < num_elements; ++i)
+ {
+ const T &q = data[i];
+ f.push_back(scale * (q - zero_point));
+ }
+ return f;
+}
+
+// NOTE Returns scale and zero point for _asymmetric_ range (both signed and unsigned).
+template <typename T> std::pair<float, int32_t> quantizationParams(float f_min, float f_max)
+{
+ static_assert(std::is_integral<T>::value, "Integral type expected.");
+ int32_t zero_point = 0;
+ float scale = 0;
+ const T qmin = std::numeric_limits<T>::lowest();
+ const T qmax = std::numeric_limits<T>::max();
+ const float qmin_double = qmin;
+ const float qmax_double = qmax;
+ // 0 should always be a representable value. Let's assume that the initial
+ // min,max range contains 0.
+ assert(f_max >= 0);
+ assert(f_min <= 0);
+ if (f_min == f_max)
+ {
+ // Special case where the min,max range is a point. Should be {0}.
+ assert(f_max == 0);
+ assert(f_min == 0);
+ return {scale, zero_point};
+ }
+
+ // General case.
+ //
+ // First determine the scale.
+ scale = (f_max - f_min) / (qmax_double - qmin_double);
+
+ // Zero-point computation.
+ // First the initial floating-point computation. The zero-point can be
+ // determined from solving an affine equation for any known pair
+ // (real value, corresponding quantized value).
+ // We know two such pairs: (rmin, qmin) and (rmax, qmax).
+ // The arithmetic error on the zero point computed from either pair
+ // will be roughly machine_epsilon * (sum of absolute values of terms)
+ // so we want to use the variant that adds the smaller terms.
+ const float zero_point_from_min = qmin_double - f_min / scale;
+ const float zero_point_from_max = qmax_double - f_max / scale;
+
+ const float zero_point_from_min_error = std::abs(qmin_double) + std::abs(f_min / scale);
+
+ const float zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale);
+
+ const float zero_point_double = zero_point_from_min_error < zero_point_from_max_error
+ ? zero_point_from_min
+ : zero_point_from_max;
+
+ // Now we need to nudge the zero point to be an integer
+ // (our zero points are integer, and this is motivated by the requirement
+ // to be able to represent the real value "0" exactly as a quantized value,
+ // which is required in multiple places, for example in Im2col with SAME
+ // padding).
+
+ T nudged_zero_point = 0;
+ if (zero_point_double < qmin_double)
+ {
+ nudged_zero_point = qmin;
+ }
+ else if (zero_point_double > qmax_double)
+ {
+ nudged_zero_point = qmax;
+ }
+ else
+ {
+ nudged_zero_point = static_cast<T>(std::round(zero_point_double));
+ }
+
+ // The zero point should always be in the range of quantized value,
+ // // [qmin, qmax].
+ assert(qmax >= nudged_zero_point);
+ assert(qmin <= nudged_zero_point);
+ zero_point = nudged_zero_point;
+ // finally, return the values
+ return {scale, zero_point};
+}
+
+inline float getTolerance(float min, float max, int quantize_steps)
+{
+ return ((max - min) / quantize_steps);
+}
+
+} // namespace testing
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TESTUTILS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp
new file mode 100644
index 000000000..802d87295
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Transpose.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Transpose::Transpose(const Tensor *input, const Tensor *perm, Tensor *output)
+ : Kernel({input, perm}, {output})
+{
+}
+
+void Transpose::configure()
+{
+ // Transpose op only supports 1D-4D input arrays.
+ int dims = input()->shape().num_dims();
+ const int32_t *perm_data = getTensorData<int32_t>(perm());
+
+ assert(input()->shape().num_dims() <= 4);
+ assert(input()->element_type() == output()->element_type());
+
+ assert(perm()->shape().num_dims() == 1);
+ assert(perm()->shape().dim(0) == dims);
+
+ Shape output_shape(dims);
+ for (int i = 0; i < dims; i++)
+ {
+ assert(perm_data[i] < dims && perm_data[i] >= 0);
+ output_shape.dim(i) = input()->shape().dim(perm_data[i]);
+ }
+
+ output()->resize(output_shape);
+}
+
+void Transpose::execute() const
+{
+ tflite::TransposeParams params{};
+ const int32_t *perm_data = getTensorData<int32_t>(perm());
+ const int32_t size = perm()->shape().dim(0);
+ params.perm_count = size;
+ for (int i = 0; i < size; i++)
+ params.perm[i] = perm_data[i];
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::reference_ops::Transpose(params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ case DataType::U8:
+ tflite::reference_ops::Transpose(params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h
new file mode 100644
index 000000000..d6f89c352
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TRANSPOSE_H
+#define LUCI_INTERPRETER_KERNELS_TRANSPOSE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Transpose : public Kernel
+{
+public:
+ Transpose(const Tensor *input, const Tensor *perm, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *perm() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TRANSPOSE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp
new file mode 100644
index 000000000..43be8f8b9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Transpose.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> perm_shape,
+ std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
+ std::initializer_list<int32_t> perm_data, std::initializer_list<T> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType element_type = getElementType<T>();
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor perm_tensor = makeInputTensor<DataType::S32>(perm_shape, perm_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(element_type);
+
+ Transpose kernel(&input_tensor, &perm_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <typename T> class TransposeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(TransposeTest, DataTypes);
+
+TYPED_TEST(TransposeTest, Small3D)
+{
+ Check<TypeParam>(/*input_shape=*/{2, 3, 4}, /*perm_shape=*/{3}, /*output_shape=*/{4, 2, 3},
+ /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
+ /*perm_data=*/{2, 0, 1},
+ /*output_data=*/{0, 4, 8, 12, 16, 20, 1, 5, 9, 13, 17, 21,
+ 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23});
+}
+
+TYPED_TEST(TransposeTest, Large4D)
+{
+ Check<TypeParam>(
+ /*input_shape=*/{2, 3, 4, 5}, /*perm_shape=*/{4}, /*output_shape=*/{4, 2, 3, 5},
+ /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
+ /*perm_data=*/{2, 0, 1, 3},
+ /*output_data=*/{0, 1, 2, 3, 4, 20, 21, 22, 23, 24, 40, 41, 42, 43, 44,
+ 60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104,
+ 5, 6, 7, 8, 9, 25, 26, 27, 28, 29, 45, 46, 47, 48, 49,
+ 65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
+ 10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50, 51, 52, 53, 54,
+ 70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
+ 15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55, 56, 57, 58, 59,
+ 75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119});
+}
+
+TYPED_TEST(TransposeTest, Large2D)
+{
+ Check<TypeParam>(
+ /*input_shape=*/{10, 12}, /*perm_shape=*/{2}, /*output_shape=*/{12, 10},
+ /*input_data=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
+ /*perm_data=*/{1, 0},
+ /*output_data=*/{0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 1, 13, 25, 37, 49,
+ 61, 73, 85, 97, 109, 2, 14, 26, 38, 50, 62, 74, 86, 98, 110,
+ 3, 15, 27, 39, 51, 63, 75, 87, 99, 111, 4, 16, 28, 40, 52,
+ 64, 76, 88, 100, 112, 5, 17, 29, 41, 53, 65, 77, 89, 101, 113,
+ 6, 18, 30, 42, 54, 66, 78, 90, 102, 114, 7, 19, 31, 43, 55,
+ 67, 79, 91, 103, 115, 8, 20, 32, 44, 56, 68, 80, 92, 104, 116,
+ 9, 21, 33, 45, 57, 69, 81, 93, 105, 117, 10, 22, 34, 46, 58,
+ 70, 82, 94, 106, 118, 11, 23, 35, 47, 59, 71, 83, 95, 107, 119});
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp
new file mode 100644
index 000000000..1b5f9d941
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TransposeConv.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
+ const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+ const TransposeConvParams &params)
+ : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias},
+ {output, scratch_tensor}, params)
+{
+}
+
+TransposeConv::~TransposeConv()
+{
+ // Define destructor here, to delete vector of qunatized multipliers properly
+}
+
+void TransposeConv::configure()
+{
+ assert(output_shape()->shape().num_dims() == 1);
+ assert(input()->shape().num_dims() == 4);
+ assert(filter()->shape().num_dims() == 4);
+ assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 ||
+ input()->element_type() == DataType::S16);
+ assert(input()->element_type() == output()->element_type());
+ assert(input()->shape().dim(3) == filter()->shape().dim(3));
+
+ const int num_dims = output_shape()->shape().dim(0);
+ Shape out_shape(num_dims);
+ const auto *shape_data = getTensorData<int32_t>(output_shape());
+ for (int i = 0; i < num_dims; i++)
+ out_shape.dim(i) = shape_data[i];
+ output()->resize(out_shape);
+
+ const int32_t filter_height = filter()->shape().dim(1);
+ const int32_t filter_width = filter()->shape().dim(2);
+ const int32_t output_height = out_shape.dim(1);
+ const int32_t output_width = out_shape.dim(2);
+
+ const int32_t unused_output_height =
+ computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
+ const int32_t unused_output_width =
+ computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
+
+ _padding_height =
+ computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
+ _padding_width =
+ computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
+
+ if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+ {
+ auto scratch_tensor = getOutputTensors()[1];
+ scratch_tensor->resize(output()->shape());
+ const std::vector<double> real_multipliers =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ _quant_multipliers = quantizeMultipliers(real_multipliers);
+ }
+ else
+ {
+ auto scratch_tensor = getOutputTensors()[1];
+ scratch_tensor->set_allocatable(false);
+ }
+}
+
+void TransposeConv::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ case DataType::U8:
+ if (filter()->scales().size() == 1)
+ {
+ evalQuantized();
+ }
+ else if (filter()->scales().size() > 1)
+ {
+ LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+ static_cast<size_t>(filter()->shape().dim(0)));
+ evalQuantizedPerChannel();
+ }
+ break;
+ case DataType::S16:
+ evalQuantizedS16();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void TransposeConv::evalFloat() const
+{
+ tflite::ConvParams op_params{};
+ op_params.padding_type = tflite::PaddingType::kSame;
+ op_params.padding_values.height = _padding_height;
+ op_params.padding_values.width = _padding_width;
+ op_params.stride_height = params().stride_height;
+ op_params.stride_width = params().stride_width;
+ tflite::reference_ops::TransposeConv(op_params, //
+ getTensorShape(input()), getTensorData<float>(input()), //
+ getTensorShape(filter()), getTensorData<float>(filter()), //
+ getTensorShape(bias()), getTensorData<float>(bias()), //
+ getTensorShape(output()), getTensorData<float>(output()), //
+ tflite::RuntimeShape(), nullptr);
+}
+
+void TransposeConv::evalQuantized() const
+{
+ tflite::ConvParams op_params{};
+ op_params.padding_type = tflite::PaddingType::kSame;
+ op_params.padding_values.height = _padding_height;
+ op_params.padding_values.width = _padding_width;
+ op_params.stride_height = params().stride_height;
+ op_params.stride_width = params().stride_width;
+ // The kernel expects input and filter zero points to be negated.
+ op_params.input_offset = -input()->zero_point(); // Note the '-'.
+ op_params.weights_offset = -filter()->zero_point(); // Note the '-'.
+ op_params.output_offset = output()->zero_point();
+ op_params.output_multiplier = _quant_multipliers[0].multiplier;
+ op_params.output_shift = _quant_multipliers[0].shift;
+ op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
+ op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
+
+ auto scratch_tensor = getOutputTensors()[1];
+
+ tflite::reference_ops::TransposeConv(op_params, //
+ getTensorShape(input()), getTensorData<uint8>(input()), //
+ getTensorShape(filter()), getTensorData<uint8>(filter()), //
+ getTensorShape(bias()), getTensorData<int32_t>(bias()), //
+ getTensorShape(output()), getTensorData<uint8>(output()), //
+ tflite::RuntimeShape(), nullptr, //
+ getTensorData<int32_t>(scratch_tensor));
+}
+
+void TransposeConv::evalQuantizedPerChannel() const
+{
+ const auto *input_data = getTensorData<uint8_t>(input());
+ const auto *filter_data = getTensorData<uint8_t>(filter());
+ const auto *bias_data = getTensorData<int32_t>(bias());
+ auto *output_data = getTensorData<uint8_t>(output());
+
+ auto scratch_tensor = getOutputTensors()[1];
+ auto *scratch_data = getTensorData<int32_t>(scratch_tensor);
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t input_depth = input_shape.dim(3);
+ const int32_t output_depth = filter_shape.dim(0);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t output_height = output_shape.dim(1);
+ const int32_t output_width = output_shape.dim(2);
+
+ const int32_t stride_height = _params.stride_height;
+ const int32_t stride_width = _params.stride_width;
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+ std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t));
+
+ BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t in_y = 0; in_y < input_height; ++in_y)
+ {
+ for (int32_t in_x = 0; in_x < input_width; ++in_x)
+ {
+ for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+ {
+ const int32_t out_y_origin = in_y * stride_height - _padding_height;
+ const int32_t out_x_origin = in_x * stride_width - _padding_width;
+ for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int32_t out_x = out_x_origin + filter_x;
+ const int32_t out_y = out_y_origin + filter_y;
+ if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ const uint8_t input_val =
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ const uint8_t filter_val =
+ filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+ scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+ static_cast<int32_t>(input_val - input()->zero_point()) *
+ static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ for (int32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+ if (bias_data)
+ {
+ acc += bias_data[out_c];
+ }
+
+ int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+ acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+ scaled_acc += output()->zero_point();
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+
+ output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+ }
+ }
+ }
+ }
+}
+
+void TransposeConv::evalQuantizedS16() const
+{
+ const auto *input_data = getTensorData<int16_t>(input());
+ const auto *filter_data = getTensorData<int16_t>(filter());
+ const auto *bias_data = getTensorData<int64_t>(bias());
+ auto *output_data = getTensorData<int16_t>(output());
+
+ auto scratch_tensor = getOutputTensors()[1];
+ auto *scratch_data = getTensorData<int64_t>(scratch_tensor);
+
+ const Shape &input_shape = input()->shape();
+ const Shape &filter_shape = filter()->shape();
+ const Shape &output_shape = output()->shape();
+
+ const int32_t batches = input_shape.dim(0);
+ const int32_t input_height = input_shape.dim(1);
+ const int32_t input_width = input_shape.dim(2);
+ const int32_t input_depth = input_shape.dim(3);
+ const int32_t output_depth = filter_shape.dim(0);
+ const int32_t filter_height = filter_shape.dim(1);
+ const int32_t filter_width = filter_shape.dim(2);
+ const int32_t output_height = output_shape.dim(1);
+ const int32_t output_width = output_shape.dim(2);
+
+ const int32_t stride_height = _params.stride_height;
+ const int32_t stride_width = _params.stride_width;
+
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+ std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t));
+
+ BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+ for (int32_t batch = 0; batch < batches; ++batch)
+ {
+ for (int32_t in_y = 0; in_y < input_height; ++in_y)
+ {
+ for (int32_t in_x = 0; in_x < input_width; ++in_x)
+ {
+ for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+ {
+ const int32_t out_y_origin = in_y * stride_height - _padding_height;
+ const int32_t out_x_origin = in_x * stride_width - _padding_width;
+ for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int32_t out_x = out_x_origin + filter_x;
+ const int32_t out_y = out_y_origin + filter_y;
+ if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ const int16_t input_val =
+ input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+ const int16_t filter_val =
+ filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+ scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+ static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ for (int32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+ {
+ int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+ if (bias_data)
+ {
+ acc += bias_data[out_c];
+ }
+ int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+ acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+ scaled_acc = std::max(scaled_acc, activation_min);
+ scaled_acc = std::min(scaled_acc, activation_max);
+
+ output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+ }
+ }
+ }
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h
new file mode 100644
index 000000000..cea0cf3c7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
+#define LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ChannelQuantMultipliers;
+
+class TransposeConv : public KernelWithParams<TransposeConvParams>
+{
+public:
+ TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
+ const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+ const TransposeConvParams &params);
+
+ ~TransposeConv();
+
+ const Tensor *output_shape() const { return _inputs[0]; }
+ const Tensor *filter() const { return _inputs[1]; }
+ const Tensor *input() const { return _inputs[2]; }
+ const Tensor *bias() const { return _inputs[3]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalQuantized() const;
+ void evalQuantizedPerChannel() const;
+ void evalQuantizedS16() const;
+
+private:
+ int32_t _padding_height{};
+ int32_t _padding_width{};
+ // The scaling factor from input to output (aka the 'real multiplier') can
+ // be represented as a fixed point multiplier plus a left shift.
+ std::vector<ChannelQuantMultipliers> _quant_multipliers;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp
new file mode 100644
index 000000000..4856e1b87
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TransposeConv.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T, typename B>
+void Check(std::initializer_list<int32_t> output_shape_shape,
+ std::initializer_list<int32_t> weight_shape, std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<int32_t> output_shape_data, std::initializer_list<T> weight_data,
+ std::initializer_list<T> input_data, std::initializer_list<B> bias_data,
+ std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height,
+ int32_t stride_width)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ constexpr DataType element_type = getElementType<T>();
+ Tensor output_shape_tensor =
+ makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data, memory_manager.get());
+ Tensor weight_tensor =
+ makeInputTensor<element_type>(weight_shape, weight_data, memory_manager.get());
+ Tensor input_data_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+
+ DataType scratch_data_type = element_type == DataType::S16 ? DataType::S64 : DataType::S32;
+ Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+ Tensor output_tensor = makeOutputTensor(element_type);
+
+ TransposeConvParams params{};
+ params.padding = padding;
+ params.stride_height = stride_height;
+ params.stride_width = stride_width;
+
+ if (bias_data.size() != 0)
+ {
+ Tensor bias_tensor =
+ makeInputTensor<getElementType<B>()>(bias_shape, bias_data, memory_manager.get());
+ TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor,
+ &output_tensor, &scratch_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ memory_manager->allocate_memory(scratch_tensor);
+ kernel.execute();
+ }
+ else
+ {
+ TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr,
+ &output_tensor, &scratch_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ memory_manager->allocate_memory(scratch_tensor);
+ kernel.execute();
+ }
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+TEST(TransposeConvTest, FloatSimple)
+{
+ Check<float, float>(
+ /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
+ /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
+ /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
+ /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ /*bias_data=*/{},
+ /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
+ /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
+
+ SUCCEED();
+}
+
+TEST(TransposeConvTest, FloatTwoFiltersTest)
+{
+ Check<float, float>(
+ /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
+ /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
+ /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
+ /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
+ /*bias_data=*/{},
+ /*output_data=*/
+ {184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760},
+ /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
+
+ SUCCEED();
+}
+
+TEST(TransposeConvTest, SimpleBiasTest)
+{
+ Check<float, float>(
+ /*output_shape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
+ /*input_shape=*/{1, 2, 2, 1},
+ /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 5, 5, 2},
+ /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
+ /*input_data=*/{1, 2, 3, 4},
+ /*bias_data=*/{3, 4},
+ /*output_data=*/{4, 6, 6, 8, 10, 14, 9, 12, 13, 16, 10, 12, 12, 14, 28, 32, 21,
+ 24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, 24, 28, 30, 34,
+ 64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
+ /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2);
+
+ SUCCEED();
+}
+
+TEST(TransposeConvTest, UInt8)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ std::vector<float> input_data{1, 2, 3, 4};
+ std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+ std::vector<float> bias_data{3, 4};
+ std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+ std::vector<float> ref_output_data{
+ 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, //
+ 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, //
+ 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, //
+ 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, //
+ 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+ };
+
+ // Choose quantization parameters carefully.
+ auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375); // s = 1 / 16, zp = 128
+ auto filter_quant = quantizationParams<uint8_t>(-24.0, 39.75); // s = 1 / 4, zp = 96
+ auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::U8>(
+ {2, 3, 3, 1}, filter_quant.first, filter_quant.second, filter_data, memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first,
+ 0, bias_data, memory_manager.get());
+ Tensor output_shape_tensor =
+ makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
+
+ DataType scratch_data_type =
+ input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+ Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+ TransposeConvParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+
+ TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+ &output_tensor, &scratch_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ memory_manager->allocate_memory(scratch_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, UInt8_CWQ)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ const int32_t output_channels = 2;
+ std::vector<float> input_data{1, 2, 3, 4};
+ std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+ std::vector<float> bias_data{3, 4};
+ std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+ std::vector<float> ref_output_data{
+ 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, //
+ 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, //
+ 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, //
+ 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, //
+ 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+ };
+
+ // Choose quantization parameters carefully.
+ auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375); // s = 1 / 16, zp = 128
+ auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
+
+ std::vector<std::pair<float, int32_t>> filter_quant_params;
+ filter_quant_params.push_back(quantizationParams<uint8_t>(0, 17));
+ filter_quant_params.push_back(quantizationParams<uint8_t>(0, 18));
+
+ std::vector<float> filter_scales;
+ std::vector<int32_t> filter_zerops;
+ for (auto iter : filter_quant_params)
+ {
+ filter_scales.push_back(iter.first);
+ filter_zerops.push_back(iter.second);
+ }
+
+ std::vector<float> bias_scales;
+ for (int i = 0; i < output_channels; ++i)
+ bias_scales.push_back(filter_quant_params[i].first * input_quant.first);
+ std::vector<int32_t> zerop(output_channels, 0);
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::U8>(
+ {output_channels, 3, 3, 1}, filter_scales, filter_zerops, 0, filter_data, memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+ bias_data, memory_manager.get());
+ Tensor output_shape_tensor =
+ makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
+
+ DataType scratch_data_type =
+ input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+ Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+ TransposeConvParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+
+ TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+ &output_tensor, &scratch_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ memory_manager->allocate_memory(scratch_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, SInt16)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ std::vector<float> input_data{1, 2, 3, 4};
+ std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+ std::vector<float> bias_data{3, 4};
+ std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+ std::vector<float> ref_output_data{
+ 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, //
+ 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, //
+ 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, //
+ 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, //
+ 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+ };
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data, memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::S16>({2, 3, 3, 1}, 0.2, 0, filter_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S64>({2}, 0.25 * 0.2, 0, bias_data, memory_manager.get());
+ Tensor output_shape_tensor =
+ makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+ DataType scratch_data_type =
+ input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+ Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+ TransposeConvParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+
+ TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+ &output_tensor, &scratch_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ memory_manager->allocate_memory(scratch_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, SInt16_CWQ_weights)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ const int output_channels = 2;
+ const Shape input_shape{1, 2, 2, 1};
+ const Shape filter_shape{output_channels, 3, 3, 1};
+ const Shape bias_shape{output_channels};
+ std::vector<int32_t> output_shape_data{1, 5, 5, output_channels};
+
+ std::vector<float> input_data{1, 2, 3, 4};
+ std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+ std::vector<float> bias_data{3, 4};
+
+ std::vector<float> ref_output_data{
+ 4, 6, 6, 8, 10, 14, 9, 12, 13, 16, //
+ 10, 12, 12, 14, 28, 32, 21, 24, 25, 28, //
+ 19, 24, 27, 32, 65, 76, 45, 52, 57, 64, //
+ 24, 28, 30, 34, 64, 72, 39, 44, 47, 52, //
+ 42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+ };
+
+ const float input_scale = 0.25;
+ const float output_scale = 0.5;
+ const std::vector<float> filter_scales{0.2f, 0.5f};
+ std::vector<float> bias_scales{filter_scales[0] * input_scale, filter_scales[1] * input_scale};
+ const std::vector<int32_t> zerop(2, 0);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+ filter_data, memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+ memory_manager.get());
+ Tensor output_shape_tensor =
+ makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
+
+ DataType scratch_data_type =
+ input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+ Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+ TransposeConvParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+
+ TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+ &output_tensor, &scratch_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ memory_manager->allocate_memory(scratch_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp
new file mode 100644
index 000000000..9127241c0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Unpack.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Unpack::Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams &params)
+ : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
+{
+}
+
+void Unpack::configure()
+{
+ const Shape &input_shape = input()->shape();
+
+ int axis = _params.axis;
+ if (axis < 0)
+ axis += input()->shape().num_dims();
+ assert(axis >= 0 && axis < input_shape.num_dims());
+
+ Shape output_shape(input_shape.num_dims() - 1);
+ int out_index = 0;
+ for (int in_index = 0; in_index < input_shape.num_dims(); ++in_index)
+ {
+ if (in_index != axis)
+ output_shape.dim(out_index++) = input_shape.dim(in_index);
+ }
+
+ for (Tensor *output : _outputs)
+ {
+ assert(output->element_type() == input()->element_type());
+ output->resize(output_shape);
+ }
+}
+
+template <typename T> void Unpack::executeImpl() const
+{
+ tflite::UnpackParams params{};
+ params.axis = _params.axis;
+ params.num_split = _outputs.size();
+ VectorOfTensors<T, false> all_outputs(_outputs);
+ tflite::reference_ops::Unpack<T>(params, getTensorShape(input()), getTensorData<T>(input()),
+ **all_outputs.shapes(), all_outputs.data());
+}
+
+void Unpack::execute() const
+{
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ return executeImpl<float>();
+ case DataType::U8:
+ return executeImpl<uint8_t>();
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h
new file mode 100644
index 000000000..f4a44ecad
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UNPACK_H
+#define LUCI_INTERPRETER_KERNELS_UNPACK_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Unpack : public KernelWithParams<UnpackParams>
+{
+public:
+ Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output(int index) const { return _outputs[index]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ template <typename T> void executeImpl() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UNPACK_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp
new file mode 100644
index 000000000..9384ddc83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Unpack.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, Shape input_shape, std::initializer_list<T> input_data,
+ const std::vector<std::initializer_list<int32_t>> &exp_output_shape,
+ std::vector<std::initializer_list<T>> exp_output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType element_type = getElementType<T>();
+ const int num_outputs = input_shape.dim(axis < 0 ? axis + input_shape.num_dims() : axis);
+
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ std::vector<Tensor> output_tensors;
+ output_tensors.reserve(num_outputs);
+ for (int i = 0; i < num_outputs; ++i)
+ {
+ output_tensors.push_back(makeOutputTensor(element_type));
+ }
+
+ std::vector<Tensor *> output_tensor_ptrs(num_outputs);
+ for (int i = 0; i < num_outputs; ++i)
+ {
+ output_tensor_ptrs[i] = &output_tensors[i];
+ }
+
+ UnpackParams params{};
+ params.axis = axis;
+
+ Unpack kernel(&input_tensor, std::move(output_tensor_ptrs), params);
+ kernel.configure();
+ for (int i = 0; i < num_outputs; i++)
+ {
+ memory_manager->allocate_memory(output_tensors[i]);
+ }
+ kernel.execute();
+
+ for (int i = 0; i < num_outputs; ++i)
+ {
+ EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+ ::testing::ElementsAreArray(exp_output_data[i]));
+ }
+}
+
+template <typename T> class UnpackTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(UnpackTest, DataTypes);
+
+TYPED_TEST(UnpackTest, ThreeOutputs)
+{
+ Check<TypeParam>(/*axis=*/0, /*input_shape=*/{3, 2},
+ /*input_data=*/{1, 2, 3, 4, 5, 6},
+ /*exp_output_shape=*/{{2}, {2}, {2}},
+ /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsAxisOne)
+{
+ Check<TypeParam>(/*axis=*/1, /*input_shape=*/{3, 2},
+ /*input_data=*/{1, 2, 3, 4, 5, 6},
+ /*exp_output_shape=*/{{3}, {3}},
+ /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsNegativeAxisOne)
+{
+ Check<TypeParam>(/*axis=*/-1, /*input_shape=*/{3, 2},
+ /*input_data=*/{1, 2, 3, 4, 5, 6},
+ /*exp_output_shape=*/{{3}, {3}},
+ /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsNegativeAxisTwo)
+{
+ Check<TypeParam>(/*axis=*/-2, /*input_shape=*/{3, 2},
+ /*input_data=*/{1, 2, 3, 4, 5, 6},
+ /*exp_output_shape=*/{{2}, {2}, {2}},
+ /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}});
+}
+
+TYPED_TEST(UnpackTest, OneOutput)
+{
+ Check<TypeParam>(/*axis=*/0, /*input_shape=*/{1, 6},
+ /*input_data=*/{1, 2, 3, 4, 5, 6},
+ /*exp_output_shape=*/{{6}},
+ /*exp_output_data=*/{{1, 2, 3, 4, 5, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeDimensionsTwoOutputs)
+{
+ Check<TypeParam>(/*axis=*/2, /*input_shape=*/{2, 2, 2},
+ /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8},
+ /*exp_output_shape=*/{{2, 2}, {2, 2}},
+ /*exp_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}});
+}
+
+TYPED_TEST(UnpackTest, FiveDimensionsTwoOutputs)
+{
+ Check<TypeParam>(
+ /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1},
+ /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+ /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}},
+ /*exp_output_data=*/
+ {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}});
+}
+
+TYPED_TEST(UnpackTest, VectorToScalar)
+{
+ Check<TypeParam>(/*axis=*/0, /*input_shape=*/{5},
+ /*input_data=*/{1, 2, 3, 4, 5},
+ /*exp_output_shape=*/{{}, {}, {}, {}, {}},
+ /*exp_output_data=*/{{1}, {2}, {3}, {4}, {5}});
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp
new file mode 100644
index 000000000..5d8e5db83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Utils.h"
+
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
+{
+ switch (activation)
+ {
+ case Activation::NONE:
+ *activation_min = std::numeric_limits<T>::lowest();
+ *activation_max = std::numeric_limits<T>::max();
+ break;
+ case Activation::RELU:
+ *activation_min = 0;
+ *activation_max = std::numeric_limits<T>::max();
+ break;
+ case Activation::RELU_N1_TO_1:
+ *activation_min = -1;
+ *activation_max = 1;
+ break;
+ case Activation::RELU6:
+ *activation_min = 0;
+ *activation_max = 6;
+ break;
+ default:
+ throw std::runtime_error("Unsupported activation.");
+ }
+}
+
+template void calculateActivationRange(Activation activation, float *activation_min,
+ float *activation_max);
+template void calculateActivationRange(Activation activation, int32_t *activation_min,
+ int32_t *activation_max);
+template void calculateActivationRange(Activation activation, int64_t *activation_min,
+ int64_t *activation_max);
+
+static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
+ const Tensor *output, int32_t *activation_min,
+ int32_t *activation_max)
+{
+ const float scale = output->scale();
+ const int32_t zero_point = output->zero_point();
+
+ auto quantize = [scale, zero_point](float x) {
+ return zero_point + static_cast<int32_t>(std::round(x / scale));
+ };
+
+ switch (activation)
+ {
+ case Activation::NONE:
+ case Activation::TANH:
+ *activation_min = qmin;
+ *activation_max = qmax;
+ break;
+ case Activation::RELU:
+ *activation_min = std::max(qmin, quantize(0.0f));
+ *activation_max = qmax;
+ break;
+ case Activation::RELU_N1_TO_1:
+ *activation_min = std::max(qmin, quantize(-1.0f));
+ *activation_max = std::min(qmax, quantize(1.0f));
+ break;
+ case Activation::RELU6:
+ *activation_min = std::max(qmin, quantize(0.0f));
+ *activation_max = std::min(qmax, quantize(6.0f));
+ break;
+ default:
+ throw std::runtime_error("Unsupported activation.");
+ }
+}
+
+void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
+ int32_t *activation_min, int32_t *activation_max)
+{
+ assert(output->zero_points().size() == 1);
+ int32_t qmin{};
+ int32_t qmax{};
+ switch (output->element_type())
+ {
+ case DataType::U8:
+ qmin = 0;
+ qmax = std::numeric_limits<uint8_t>::max();
+ break;
+ case DataType::S8:
+ qmin = -std::numeric_limits<int8_t>::max();
+ qmax = std::numeric_limits<int8_t>::max();
+ break;
+ case DataType::S16:
+ // For now, assume that signed int16 type implies signed symmetric quantization.
+ assert(output->zero_point() == 0);
+ qmin = -std::numeric_limits<int16_t>::max();
+ qmax = std::numeric_limits<int16_t>::max();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+
+ calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, activation_min,
+ activation_max);
+}
+
+void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
+{
+ if (double_multiplier == 0.0)
+ {
+ *quantized_multiplier = 0;
+ *shift = 0;
+ return;
+ }
+
+ const double q = std::frexp(double_multiplier, shift);
+ auto q_fixed = static_cast<int64_t>(std::round(q * (INT64_C(1) << 31)));
+
+ if (q_fixed == (INT64_C(1) << 31))
+ {
+ q_fixed /= 2;
+ ++*shift;
+ }
+ assert(q_fixed <= std::numeric_limits<int32_t>::max());
+ // A shift amount smaller than -31 would cause all bits to be shifted out
+ // and thus all results would be zero. We implement that instead with
+ // q_fixed==0, so as to avoid hitting issues with right-shift
+ // operations with shift amounts greater than 31. Note that this happens
+ // roughly when abs(double_multiplier) < 2^-31 and the present handling means
+ // that we're effectively flushing tiny double_multiplier's to zero.
+ // We could conceivably handle values in the range (roughly) [32, 63]
+ // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
+ // the present handling is just doing 'flush denormals to zero'. We could
+ // reconsider and actually generate nonzero denormals if a need arises.
+ if (*shift < -31)
+ {
+ *shift = 0;
+ q_fixed = 0;
+ }
+ *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+ int *left_shift)
+{
+ assert(double_multiplier < 1.0);
+ assert(double_multiplier > 0.0);
+ int shift;
+ quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
+ assert(shift <= 0);
+ *left_shift = shift;
+}
+
+Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape)
+{
+ const int num_input1_dims = input1_shape.num_dims();
+ const int num_input2_dims = input2_shape.num_dims();
+ const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
+ Shape output_shape(num_out_dims);
+
+ for (int i = 0; i < num_out_dims; ++i)
+ {
+ const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1;
+ const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1;
+
+ bool need_broadcast = input1_dim != input2_dim;
+ bool can_broadcast = input1_dim == 1 || input2_dim == 1;
+ LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
+
+ output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim);
+ }
+
+ return output_shape;
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h
new file mode 100644
index 000000000..ebeb20e66
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UTILS_H
+#define LUCI_INTERPRETER_KERNELS_UTILS_H
+
+#include "core/KernelParams.h"
+#include "luci_interpreter/core/Tensor.h"
+
+#include <tensorflow/lite/kernels/internal/types.h>
+
+#include <cassert>
+#include <cstdint>
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+#define LUCI_INTERPRETER_CHECK(cond) \
+ if (!(cond)) \
+ throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + +"(" + \
+ std::string(#cond) + ") was not true.");
+
+inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
+ int32_t filter_size, int32_t out_size)
+{
+ const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+ const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
+ return padding > 0 ? padding : 0;
+}
+
+inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size,
+ int32_t filter_size, int32_t out_size, int32_t *offset)
+{
+ int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+ int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
+ total_padding = total_padding > 0 ? total_padding : 0;
+ *offset = total_padding % 2;
+ return total_padding / 2;
+}
+
+inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size,
+ int32_t stride, int32_t dilation_rate = 1)
+{
+ const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+ switch (padding)
+ {
+ case Padding::SAME:
+ return (image_size + stride - 1) / stride;
+ case Padding::VALID:
+ return (image_size + stride - effective_filter_size) / stride;
+ default:
+ assert(false);
+ return 0;
+ }
+}
+
+inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
+{
+ return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3;
+}
+
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
+
+void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
+ int32_t *activation_min, int32_t *activation_max);
+
+template <typename T> constexpr bool one_of_types() { return false; }
+
+// Checks if T is equal to one of {U,Other} types
+template <typename T, typename U, typename... Other> constexpr bool one_of_types()
+{
+ return std::is_same<T, U>::value || one_of_types<T, Other...>();
+}
+
+/**
+ * Fills activation min and max parameters depending on given data type and activation
+ *
+ * T is a template parameter, so after optimization this code left with only required if case
+ *
+ * @tparam T data type of arithmetic operation output tensor
+ * @param params tflite params to fill
+ * @param activation luci_interpreter::Activation of arithmetic operation
+ */
+template <typename T>
+void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act)
+{
+ static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
+
+ if (std::is_same<T, float>::value)
+ calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
+ if (std::is_same<T, int32_t>::value)
+ calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
+ else
+ calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
+}
+
+// Decompose a double multiplier into a Q0.31 int32 representation of its
+// significand, and shift representation of its exponent.
+//
+// Handles an arbitrary positive multiplier. The 'shift' output-value is
+// basically the 'floating-point exponent' of the multiplier:
+// Negative for a right-shift (when the multiplier is <1), positive for a
+// left-shift (when the multiplier is >1)
+void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
+
+// Decompose a double multiplier into a Q0.31 int32 representation of its
+// significand, and shift representation of NEGATIVE its exponent ---
+// this is intended as a RIGHT-shift.
+//
+// Restricted to the case where the multiplier < 1 (and non-negative).
+void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+ int *left_shift);
+
+Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape);
+
+inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale,
+ float output_scale)
+{
+ const double input_product_scale = static_cast<double>(input_scale * filter_scale);
+ LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
+ return input_product_scale / static_cast<double>(output_scale);
+}
+
+// TODO rename getQuantizedConvolutionMultiplers to something more general
+// it is used for non conv operators too
+inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
+ const std::vector<float> &filter_scale,
+ float output_scale)
+{
+ std::vector<double> effective_output_scales;
+ size_t n = filter_scale.size();
+ effective_output_scales.reserve(n);
+ for (size_t i = 0; i < n; ++i)
+ {
+ effective_output_scales.push_back(
+ getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
+ }
+ return effective_output_scales;
+}
+
+struct ChannelQuantMultipliers
+{
+ int shift;
+ int32_t multiplier;
+ ChannelQuantMultipliers() = default;
+};
+
+inline std::vector<ChannelQuantMultipliers>
+quantizeMultipliers(const std::vector<double> &effective_scale)
+{
+ size_t n = effective_scale.size();
+ std::vector<ChannelQuantMultipliers> params(n);
+ for (size_t i = 0; i < n; ++i)
+ {
+ quantizeMultiplier(effective_scale[i], &params[i].multiplier, &params[i].shift);
+ }
+ return params;
+}
+
+// Helper wrapper to hide broadcast logic
+template <typename T> class BroadcastableWrapper
+{
+public:
+ BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {}
+
+ T operator[](int idx) { return _v[idx * _stride]; }
+
+private:
+ const std::vector<T> &_v;
+ int _stride;
+};
+
+inline tflite::RuntimeShape getTensorShape(const Tensor *tensor)
+{
+ if (tensor == nullptr)
+ return tflite::RuntimeShape();
+
+ const Shape &shape = tensor->shape();
+ tflite::RuntimeShape runtime_shape(shape.num_dims());
+ for (int i = 0; i < shape.num_dims(); ++i)
+ {
+ runtime_shape.SetDim(i, shape.dim(i));
+ }
+ return runtime_shape;
+}
+
+template <typename T> const T *getTensorData(const Tensor *tensor)
+{
+ return tensor != nullptr ? tensor->data<T>() : nullptr;
+}
+
+template <typename T> T *getTensorData(Tensor *tensor)
+{
+ return tensor != nullptr ? tensor->data<T>() : nullptr;
+}
+
+// A list of tensors in a format that can be used by kernels like split and
+// concatenation.
+template <typename T, bool is_const> class VectorOfTensors
+{
+public:
+ using ElementT = typename std::conditional<is_const, const T, T>::type;
+ using TensorT = typename std::conditional<is_const, const Tensor, Tensor>::type;
+
+ // Build with the tensors in 'tensor_list'.
+ explicit VectorOfTensors(const std::vector<TensorT *> &tensor_list)
+ {
+ const int num_tensors = tensor_list.size();
+
+ all_data_.reserve(num_tensors);
+ all_shape_.reserve(num_tensors);
+ all_shape_ptr_.reserve(num_tensors);
+
+ for (TensorT *tensor : tensor_list)
+ {
+ all_data_.push_back(getTensorData<T>(tensor));
+ all_shape_.push_back(getTensorShape(tensor));
+ }
+
+ // Taking the pointer from inside a std::vector is only OK if the vector is
+ // never modified, so we populate all_shape in the previous loop and then we
+ // are free to grab iterators here.
+ for (tflite::RuntimeShape &shape : all_shape_)
+ {
+ all_shape_ptr_.push_back(&shape);
+ }
+ }
+ // Return a pointer to the data pointers of all tensors in the list. For
+ // example:
+ // float* const* f = v.data();
+ // f[0][1] is the second element of the first tensor.
+ ElementT *const *data() const { return all_data_.data(); }
+
+ // Return a pointer the shape pointers of all tensors in the list. For
+ // example:
+ // const RuntimeShape* const* d = v.dims();
+ // dims[1] are the dimensions of the second tensor in the list.
+ const tflite::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); }
+
+private:
+ std::vector<ElementT *> all_data_;
+ std::vector<tflite::RuntimeShape> all_shape_;
+ std::vector<tflite::RuntimeShape *> all_shape_ptr_;
+};
+
+// A list of quantized tensors in a format that can be used by kernels like
+// split and concatenation.
+template <bool is_const> class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t, is_const>
+{
+public:
+ using typename VectorOfTensors<uint8_t, is_const>::TensorT;
+
+ // Build with the tensors in 'tensor_list'.
+ explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
+ : VectorOfTensors<uint8_t, is_const>(tensor_list)
+ {
+ for (TensorT *tensor : tensor_list)
+ {
+ zero_point_.push_back(tensor->zero_point());
+ scale_.push_back(tensor->scale());
+ }
+ }
+
+ const float *scale() const { return scale_.data(); }
+ const int32_t *zero_point() const { return zero_point_.data(); }
+
+private:
+ std::vector<int32_t> zero_point_;
+ std::vector<float> scale_;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UTILS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp
new file mode 100644
index 000000000..153bd1a99
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/While.h"
+#include "kernels/Utils.h"
+
+#include <cstring>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+void copy(const std::vector<const Tensor *> &src, const std::vector<Tensor *> &dst)
+{
+ for (size_t i = 0; i < src.size(); ++i)
+ {
+ LUCI_INTERPRETER_CHECK(dst[i]->element_type() == src[i]->element_type());
+ dst[i]->resize(src[i]->shape());
+
+ const int32_t num_elements = src[i]->shape().num_elements();
+ const std::size_t element_size = getDataTypeSize(src[i]->element_type());
+ std::memcpy(dst[i]->data<void>(), src[i]->data<void>(), num_elements * element_size);
+ }
+}
+
+void copy(const std::vector<Tensor *> &src, const std::vector<Tensor *> &dst)
+{
+ std::vector<const Tensor *> const_src;
+ for (const auto &t : src)
+ const_src.push_back(t);
+ copy(const_src, dst);
+}
+
+// TODO: Think about how allocate memory for output in main graph
+void configureTensorsAllocations(const std::vector<Tensor *> &tensors, RuntimeGraph *run_graph)
+{
+ for (auto tensor : tensors)
+ run_graph->configureAllocations(tensor);
+}
+
+} // namespace
+
+While::While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
+ RuntimeGraph *cond_graph, RuntimeGraph *body_graph)
+ : Kernel(std::move(inputs), std::move(outputs)), _cond_graph(cond_graph), _body_graph(body_graph)
+{
+}
+
+void While::configure()
+{
+ LUCI_INTERPRETER_CHECK(_body_graph->getInputTensors().size() == getInputTensors().size());
+ LUCI_INTERPRETER_CHECK(_body_graph->getOutputTensors().size() == getOutputTensors().size());
+ LUCI_INTERPRETER_CHECK(_body_graph->getOutputTensors().size() == getInputTensors().size());
+
+ LUCI_INTERPRETER_CHECK(_cond_graph->getInputTensors().size() == getInputTensors().size());
+
+ const auto &cond_outputs = _cond_graph->getOutputTensors();
+ LUCI_INTERPRETER_CHECK(cond_outputs.size() == 1)
+ LUCI_INTERPRETER_CHECK(cond_outputs[0]->element_type() == DataType::BOOL);
+}
+
+/**
+ * @note Dynamic shape such as {1, 0, 8} may fail in tensor->data()
+ */
+void While::execute() const
+{
+ const auto &cond_inputs = _cond_graph->getInputTensors();
+ const auto &cond_outputs = _cond_graph->getOutputTensors();
+
+ configureTensorsAllocations(cond_inputs, _cond_graph);
+
+ copy(getInputTensors(), cond_inputs);
+
+ const auto &body_inputs = _body_graph->getInputTensors();
+ const auto &body_outputs = _body_graph->getOutputTensors();
+
+ configureTensorsAllocations(body_inputs, _body_graph);
+
+ while (true)
+ {
+ _cond_graph->execute();
+
+ bool cond_value = cond_outputs[0]->data<bool>()[0];
+ if (!cond_value)
+ break;
+
+ copy(cond_inputs, body_inputs);
+
+ _body_graph->execute();
+
+ copy(body_outputs, cond_inputs);
+ }
+
+ copy(cond_inputs, getOutputTensors());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.h b/compiler/luci-micro/luci-interpreter/src/kernels/While.h
new file mode 100644
index 000000000..f758df3f3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_WHILE_H
+#define LUCI_INTERPRETER_KERNELS_WHILE_H
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class While : public Kernel
+{
+public:
+ While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs, RuntimeGraph *cond_graph,
+ RuntimeGraph *body_graph);
+
+ const Tensor *input(int index) const { return _inputs[index]; }
+ Tensor *output(int index) const { return _outputs[index]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ RuntimeGraph *const _cond_graph = nullptr;
+ RuntimeGraph *const _body_graph = nullptr;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_WHILE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp
new file mode 100644
index 000000000..cb8f89130
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeModule.h"
+#include "kernels/Add.h"
+#include "kernels/Less.h"
+#include "kernels/While.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *input_cond,
+ IMemoryManager *memory_manager)
+{
+ RuntimeGraph *graph = module->addGraph(memory_manager);
+ Tensor *input =
+ graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
+ Tensor *output =
+ graph->addTensor(std::make_unique<Tensor>(DataType::BOOL, Shape{}, AffineQuantization{}, ""));
+
+ memory_manager->allocate_memory(*input);
+ memory_manager->allocate_memory(*output);
+
+ graph->setInputTensors({input});
+ graph->setOutputTensors({output});
+
+ graph->addKernel(std::make_unique<Less>(input, input_cond, output));
+
+ return graph;
+}
+
+RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *input_add,
+ IMemoryManager *memory_manager)
+{
+ RuntimeGraph *graph = module->addGraph(memory_manager);
+ Tensor *input =
+ graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
+ Tensor *output =
+ graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
+
+ memory_manager->allocate_memory(*input);
+ memory_manager->allocate_memory(*output);
+
+ graph->setInputTensors({input});
+ graph->setOutputTensors({output});
+
+ AddParams params{};
+ params.activation = Activation::NONE;
+ graph->addKernel(std::make_unique<Add>(input, input_add, output, params));
+
+ return graph;
+}
+
+TEST(WhileTest, FloatLoop10)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get());
+ Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor input_cond = makeInputTensor<DataType::FLOAT32>({1}, {10}, memory_manager.get());
+ Tensor input_add = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get());
+
+ RuntimeModule module(nullptr);
+ RuntimeGraph *cond_graph =
+ buildCondSubgraph(&module, DataType::FLOAT32, &input_cond, memory_manager.get());
+ RuntimeGraph *body_graph =
+ buildBodySubgraph(&module, DataType::FLOAT32, &input_add, memory_manager.get());
+
+ While kernel({&input}, {&output}, cond_graph, body_graph);
+ kernel.configure();
+ memory_manager->allocate_memory(output);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({10}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt
new file mode 100644
index 000000000..292771592
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt
@@ -0,0 +1,39 @@
+set(SOURCES
+ GraphLoader.h
+ GraphLoader.cpp
+ KernelBuilderHelper.h
+ KernelBuilderHelper.cpp
+ KernelBuilder.h
+ KernelBuilder.cpp
+ ModuleLoader.h
+ ModuleLoader.cpp
+ RuntimeToIR.h
+ nodes/Builders.h)
+
+# include kernel specific builders
+macro(REGISTER_KERNEL NODE)
+ list(APPEND SOURCES "nodes/${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+include(${KERNEL_REGISTER_FILE})
+
+add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}")
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+
+target_link_libraries(${LUCI_INTERPRETER_LOADER}
+ PUBLIC luci_lang ${LUCI_INTERPRETER_CORE}
+ PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common luci_plan)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+set(TEST_SOURCES KernelBuilder.test.cpp)
+
+GTest_AddTest(${LUCI_INTERPRETER_LOADER}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_LOADER}_test ${LUCI_INTERPRETER_LOADER})
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp
new file mode 100644
index 000000000..40207090b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/GraphLoader.h"
+
+#include "loader/KernelBuilder.h"
+
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+#include <loco/IR/Algorithm.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+template <typename NodeT> Shape getNodeShape(const NodeT *node)
+{
+ Shape shape(node->rank());
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ {
+ shape.dim(i) = node->dim(i).value();
+ }
+ return shape;
+}
+
+template <DataType DT> const void *getNodeDataImpl(const luci::CircleConst *node, size_t *data_size)
+{
+ const size_t element_size = getDataTypeSize(DT);
+ const int32_t num_elements = node->size<DT>();
+
+ *data_size = num_elements * element_size;
+ if (*data_size > 0)
+ {
+ // FIXME There is no good way to get the pointer to the data currently.
+ return &node->at<DT>(0);
+ }
+ return nullptr;
+}
+
+const void *getNodeData(const luci::CircleConst *node, size_t *data_size)
+{
+ switch (node->dtype())
+ {
+ case DataType::U8:
+ return getNodeDataImpl<DataType::U8>(node, data_size);
+ case DataType::FLOAT32:
+ return getNodeDataImpl<DataType::FLOAT32>(node, data_size);
+ case DataType::S8:
+ return getNodeDataImpl<DataType::S8>(node, data_size);
+ case DataType::S16:
+ return getNodeDataImpl<DataType::S16>(node, data_size);
+ case DataType::S32:
+ return getNodeDataImpl<DataType::S32>(node, data_size);
+ case DataType::S64:
+ return getNodeDataImpl<DataType::S64>(node, data_size);
+ case DataType::BOOL:
+ return getNodeDataImpl<DataType::BOOL>(node, data_size);
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+const void *getNodeData(const luci::CircleCustom *node, size_t *data_size)
+{
+ if (node->custom_code() != "CircleReferencingConst")
+ return nullptr;
+
+ // helper struct which describes data loaded to custom_options of CircleReferencingConst node
+ // TODO move this struct to header
+ struct ConstDataReference
+ {
+ const uint8_t *data = nullptr;
+ uint32_t size = 0;
+ };
+
+ const auto &custom_options = node->custom_options();
+ const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data());
+
+ *data_size = const_data_ref.size;
+ return const_data_ref.data;
+}
+
+bool isExecutableNode(const luci::CircleNode *node)
+{
+ switch (node->opcode())
+ {
+ // These nodes denote inputs / outputs of a graph.
+ case luci::CircleOpcode::CIRCLECONST:
+ case luci::CircleOpcode::CIRCLEINPUT:
+ case luci::CircleOpcode::CIRCLEOUTPUT:
+ case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE:
+ // The following nodes denote outputs of multiple-output nodes.
+ case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:
+ case luci::CircleOpcode::CIRCLECUSTOMOUT:
+ case luci::CircleOpcode::CIRCLEIFOUT:
+ case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:
+ case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:
+ case luci::CircleOpcode::CIRCLESPLITOUT:
+ case luci::CircleOpcode::CIRCLESPLITVOUT:
+ case luci::CircleOpcode::CIRCLETOPKV2OUT:
+ case luci::CircleOpcode::CIRCLEUNIQUEOUT:
+ case luci::CircleOpcode::CIRCLEUNPACKOUT:
+ case luci::CircleOpcode::CIRCLEVARIABLE:
+ case luci::CircleOpcode::CIRCLEWHILEOUT:
+ return false;
+ // Custom nodes may be executable and non-executable
+ case luci::CircleOpcode::CUSTOM:
+ {
+ auto const custom_node = loco::must_cast<const luci::CircleCustom *>(node);
+
+ // TODO handle more non-executable Custom ops here
+ if (custom_node->custom_code() == "CircleReferencingConst")
+ return false;
+
+ return true;
+ }
+ default:
+ return true;
+ }
+}
+
+bool isTensorProducingNode(const luci::CircleNode *node)
+{
+ switch (node->opcode())
+ {
+ // Output nodes do not produce tensors.
+ case luci::CircleOpcode::CIRCLEOUTPUT:
+ // The following nodes are multiple-output nodes. They do not produce tensors, the tensors
+ // are produced by the corresponding *Out nodes instead.
+ case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
+ case luci::CircleOpcode::CUSTOM:
+ case luci::CircleOpcode::IF:
+ case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
+ case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
+ case luci::CircleOpcode::SPLIT:
+ case luci::CircleOpcode::SPLIT_V:
+ case luci::CircleOpcode::TOPK_V2:
+ case luci::CircleOpcode::UNIQUE:
+ case luci::CircleOpcode::UNPACK:
+ case luci::CircleOpcode::WHILE:
+ return false;
+ default:
+ return true;
+ }
+}
+
+bool isSupportedCustomNode(const luci::CircleNode *node)
+{
+ const auto custom_node = loco::must_cast<const luci::CircleCustom *>(node);
+
+ // TODO handle more Custom ops here
+ if (custom_node->custom_code() == "CircleReferencingConst")
+ return true;
+
+ return false;
+}
+
+} // namespace
+
+GraphLoader::GraphLoader(
+ const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+ std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, IMemoryManager *memory_manager)
+ : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
+ _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor),
+ _memory_manager(memory_manager)
+{
+}
+
+void GraphLoader::loadTensors()
+{
+ for (uint32_t i = 0; i < _graph->nodes()->size(); ++i)
+ {
+ const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
+
+ if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
+ throw std::runtime_error("Unsupported Custom operator. " + node->name());
+
+ if (!isTensorProducingNode(node))
+ continue;
+
+ // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will
+ // be inferred.
+ Shape shape{};
+ switch (node->opcode())
+ {
+ case luci::CircleOpcode::CIRCLECONST:
+ case luci::CircleOpcode::CIRCLECUSTOMOUT:
+ case luci::CircleOpcode::CIRCLEINPUT:
+ case luci::CircleOpcode::CIRCLEVARIABLE:
+ shape = getNodeShape(node);
+ break;
+ default:
+ break;
+ }
+
+ AffineQuantization quantization;
+ if (node->quantparam() != nullptr)
+ {
+ const luci::CircleQuantParam *params = node->quantparam();
+ assert(params->scale.size() == params->zerop.size());
+ quantization.scale.assign(params->scale.cbegin(), params->scale.cend());
+ quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend());
+ quantization.quantized_dimension = params->quantized_dimension;
+ }
+
+ auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization),
+ node->name());
+
+ // If node has execution plan then read memory offsets for nodes
+ // from the beginning of shared memory buffer. Used in Static Memory Manager.
+ if (luci::has_execution_plan(node))
+ {
+ auto execution_plan = luci::get_execution_plan(node);
+ assert(!execution_plan.offsets().empty());
+ tensor->set_offset(execution_plan.offsets().front());
+ }
+
+ if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
+ {
+ size_t data_size{};
+ const void *const_data = getNodeData(const_node, &data_size);
+ if (const_data != nullptr)
+ {
+ _memory_manager->allocate_memory(*tensor);
+ tensor->writeData(const_data, data_size);
+ }
+ }
+ else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node))
+ {
+ const auto *custom_node =
+ loco::must_cast<const luci::CircleCustom *>(custom_out_node->input());
+
+ if (custom_node->custom_code() == "CircleReferencingConst")
+ {
+ size_t data_size{};
+ const void *const_data = getNodeData(custom_node, &data_size);
+ if (const_data != nullptr)
+ {
+ _memory_manager->allocate_memory(*tensor);
+ tensor->writeData(const_data, data_size);
+ }
+ }
+ }
+
+ _node_to_tensor.emplace(node, tensor.get());
+ _runtime_to_ir.tensor_to_node.emplace(tensor.get(), node);
+
+ _runtime_graph->addTensor(std::move(tensor));
+ }
+}
+
+void GraphLoader::initInputOutputTensors() const
+{
+ auto input_nodes = loco::input_nodes(_graph);
+ std::vector<Tensor *> input_tensors(input_nodes.size());
+ for (size_t i = 0; i < input_nodes.size(); ++i)
+ {
+ input_tensors[i] = _node_to_tensor.at(input_nodes[i]);
+ _memory_manager->allocate_memory(*input_tensors[i]);
+ }
+ _runtime_graph->setInputTensors(input_tensors);
+
+ auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));
+ std::vector<Tensor *> output_tensors(output_nodes.size());
+ for (size_t i = 0; i < output_nodes.size(); ++i)
+ {
+ const auto *node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+ output_tensors[i] = _node_to_tensor.at(node->from());
+ }
+ _runtime_graph->setOutputTensors(output_tensors);
+}
+
+void GraphLoader::loadOperators()
+{
+ KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor);
+
+ // Create kernels for executable nodes. This has to be done in execution order.
+ auto graph = const_cast<loco::Graph *>(_graph);
+
+ auto const graph_nodes = loco::all_nodes(graph);
+
+ // Checking for execution plan in node annotations.
+ bool has_execution_annotation = true;
+ auto const checking_exec_plan = [&has_execution_annotation](auto const node) {
+ const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
+ if (!luci::has_execution_plan(circle_node))
+ has_execution_annotation = false;
+ };
+ std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan);
+
+ if (has_execution_annotation)
+ {
+ // Build ordered_nodes vector that stores the order of execution of graph nodes.
+ std::vector<const luci::CircleNode *> ordered_nodes(graph_nodes.size());
+
+ auto const filler = [&ordered_nodes](auto const node) {
+ const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
+ auto const position = luci::get_execution_plan(circle_node).order_in_plan();
+ ordered_nodes.at(position) = circle_node;
+ };
+ std::for_each(begin(graph_nodes), end(graph_nodes), filler);
+
+ for (auto node : ordered_nodes)
+ {
+ if (isExecutableNode(node))
+ {
+ std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
+ _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
+ _runtime_graph->addKernel(std::move(kernel));
+ }
+ }
+ }
+ else
+ {
+ // If it is impossible to build the execution order plan,
+ // then we use the default postorder_traversal approach.
+ for (const loco::Node *loco_node : loco::postorder_traversal(loco::output_nodes(graph)))
+ {
+ const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node);
+ if (isExecutableNode(node))
+ {
+ std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
+ _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
+ _runtime_graph->addKernel(std::move(kernel));
+ }
+ }
+ }
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h
new file mode 100644
index 000000000..fe066ecf8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+#define LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+
+#include "core/RuntimeGraph.h"
+#include "loader/RuntimeToIR.h"
+#include "luci_interpreter/MemoryManager.h"
+
+#include <loco/IR/Graph.h>
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class GraphLoader
+{
+public:
+ GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+ std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+ IMemoryManager *memory_manager);
+
+ void loadTensors();
+ void initInputOutputTensors() const;
+ void loadOperators();
+
+private:
+ const loco::Graph *_graph;
+ RuntimeGraph *_runtime_graph;
+ RuntimeToIR &_runtime_to_ir;
+ IMemoryManager *_memory_manager;
+
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
+ std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp
new file mode 100644
index 000000000..8483a9a3d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/KernelBuilder.h"
+#include "loader/nodes/Builders.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+#define CIRCLE_NODE(OPCODE, CLASS) CLASS,
+#define CIRCLE_VNODE(OPCODE, CLASS) CLASS,
+
+// This enum is auxiliary.
+// It is duplicate of luci::CircleOpcode but initialized with CLASS instead of OPCODE,
+// because list of target operators is in format of CLASS names
+enum class BuilderId
+{
+#include <luci/IR/CircleNodes.lst>
+ Size // casts to count of values in BuilderId enum
+};
+
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+
+/**
+ * @brief Registry of kernel builders
+ *
+ * This class contains mapping from Opcodes to kernel builder functions
+ */
+
+class KernelBuilderRegistry
+{
+public:
+ using KernelBuilderFunc = std::unique_ptr<Kernel>(const luci::CircleNode *,
+ KernelBuilderHelper &);
+
+ KernelBuilderRegistry() : _operator_builders(size_t(BuilderId::Size), nullptr)
+ {
+#define REGISTER_KERNEL(name) \
+ register_kernel_builder(BuilderId::Circle##name, build_kernel_Circle##name);
+
+#include "KernelsToBuild.lst"
+
+#undef REGISTER_KERNEL
+ }
+
+ KernelBuilderFunc *get_kernel_builder_func(luci::CircleOpcode opcode) const
+ {
+ return _operator_builders.at(size_t(opcode));
+ }
+
+private:
+ std::vector<KernelBuilderFunc *> _operator_builders;
+
+ void register_kernel_builder(BuilderId id, KernelBuilderFunc *func)
+ {
+ // Using BuilderId is a duplicate of luci::CirclreOpcode,
+ // size_t(id) is equal to size_t(corresponding operation opcode).
+ assert(size_t(id) < _operator_builders.size());
+ _operator_builders[size_t(id)] = func;
+ }
+};
+
+KernelBuilder::KernelBuilder(
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+ const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+ : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
+{
+ _builder_registry = std::make_unique<KernelBuilderRegistry>();
+}
+
+KernelBuilder::~KernelBuilder()
+{
+ // Need to define in this CPP to hide KernelBuilderRegistry internals.
+ // This destructor deletes _builder_registry
+}
+
+std::unique_ptr<Kernel> KernelBuilder::build(const luci::CircleNode *node)
+{
+ auto specific_builder = _builder_registry->get_kernel_builder_func(node->opcode());
+ if (specific_builder != nullptr)
+ return specific_builder(node, *this);
+
+ std::string msg = "Unsupported operator: ";
+ msg += std::to_string(static_cast<uint32_t>(node->opcode())) + " " + std::string(node->name());
+ throw std::invalid_argument(msg.c_str());
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h
new file mode 100644
index 000000000..b1f383394
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
+#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
+
+#include "loader/KernelBuilderHelper.h"
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <memory>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class KernelBuilderRegistry;
+
+class KernelBuilder : public KernelBuilderHelper
+{
+public:
+ KernelBuilder(
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+ const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
+
+ ~KernelBuilder();
+
+ std::unique_ptr<Kernel> build(const luci::CircleNode *node);
+
+private:
+ std::unique_ptr<KernelBuilderRegistry> _builder_registry;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp
new file mode 100644
index 000000000..b221b6921
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp
@@ -0,0 +1,1376 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/GraphLoader.h"
+#include "loader/KernelBuilder.h"
+#include "luci_interpreter/SimpleMemoryManager.h"
+
+#include <kernels/Add.h>
+#include <kernels/ArgMax.h>
+#include <kernels/AveragePool2D.h>
+#include <kernels/BatchMatMul.h>
+#include <kernels/Cast.h>
+#include <kernels/Concatenation.h>
+#include <kernels/Conv2D.h>
+#include <kernels/DepthToSpace.h>
+#include <kernels/DepthwiseConv2D.h>
+#include <kernels/Div.h>
+#include <kernels/Elu.h>
+#include <kernels/Exp.h>
+#include <kernels/Floor.h>
+#include <kernels/FloorDiv.h>
+#include <kernels/Equal.h>
+#include <kernels/FullyConnected.h>
+#include <kernels/Greater.h>
+#include <kernels/GreaterEqual.h>
+#include <kernels/InstanceNorm.h>
+#include <kernels/L2Normalize.h>
+#include <kernels/L2Pool2D.h>
+#include <kernels/LeakyRelu.h>
+#include <kernels/Less.h>
+#include <kernels/LessEqual.h>
+#include <kernels/LocalResponseNormalization.h>
+#include <kernels/LogicalAnd.h>
+#include <kernels/LogicalNot.h>
+#include <kernels/LogicalOr.h>
+#include <kernels/Logistic.h>
+#include <kernels/LogSoftmax.h>
+#include <kernels/Maximum.h>
+#include <kernels/MaxPool2D.h>
+#include <kernels/Mean.h>
+#include <kernels/Minimum.h>
+#include <kernels/Mul.h>
+#include <kernels/Neg.h>
+#include <kernels/NotEqual.h>
+#include <kernels/OneHot.h>
+#include <kernels/Pad.h>
+#include <kernels/PadV2.h>
+#include <kernels/Pow.h>
+#include <kernels/PRelu.h>
+#include <kernels/Relu.h>
+#include <kernels/Relu6.h>
+#include <kernels/Reshape.h>
+#include <kernels/ResizeBilinear.h>
+#include <kernels/ResizeNearestNeighbor.h>
+#include <kernels/ReverseV2.h>
+#include <kernels/Rsqrt.h>
+#include <kernels/Slice.h>
+#include <kernels/Softmax.h>
+#include <kernels/SpaceToDepth.h>
+#include <kernels/Split.h>
+#include <kernels/SplitV.h>
+#include <kernels/Sqrt.h>
+#include <kernels/SquaredDifference.h>
+#include <kernels/Squeeze.h>
+#include <kernels/StridedSlice.h>
+#include <kernels/Sub.h>
+#include <kernels/Tanh.h>
+#include <kernels/Transpose.h>
+#include <kernels/TransposeConv.h>
+#include <kernels/Unpack.h>
+
+#include <gmock/gmock.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class KernelBuilderTest : public Test
+{
+protected:
+ luci::CircleInput *createInputNode() { return createNode<luci::CircleInput>(); }
+ void SetUp() override { _memory_manager = std::make_unique<SimpleMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+
+ template <typename NodeT, typename... Args> NodeT *createNode(Args &&... args)
+ {
+ auto *node = _graph.nodes()->create<NodeT>(std::forward<Args>(args)...);
+ // The actual type does not matter for the purpose of the tests.
+ // NOTE The type is meaningless for nodes with multiple outputs (corresponding *Out nodes carry
+ // actual output types).
+ node->dtype(loco::DataType::FLOAT32);
+ return node;
+ }
+
+ template <typename NodeOutT> NodeOutT *createNodeOut(loco::Node *node, int index)
+ {
+ auto *node_out = createNode<NodeOutT>();
+ node_out->input(node);
+ node_out->index(index);
+ return node_out;
+ }
+
+ template <typename KernelT> std::unique_ptr<KernelT> buildKernel(const luci::CircleNode *op)
+ {
+ std::unordered_map<const loco::Graph *, RuntimeGraph *> graph_to_runtime_graph;
+
+ RuntimeGraph runtime_graph(nullptr, _memory_manager.get());
+ graph_to_runtime_graph[&_graph] = &runtime_graph;
+ RuntimeToIR runtime_to_ir;
+ GraphLoader graph_loader(&_graph, &runtime_graph, runtime_to_ir, graph_to_runtime_graph,
+ _node_to_tensor, _memory_manager.get());
+ graph_loader.loadTensors();
+
+ KernelBuilder kernel_builder(graph_to_runtime_graph, _node_to_tensor);
+
+ auto kernel = kernel_builder.build(op);
+ return std::unique_ptr<KernelT>(dynamic_cast<KernelT *>(kernel.release()));
+ }
+
+ void checkTensor(const Tensor *tensor, const loco::Node *node)
+ {
+ EXPECT_THAT(tensor, Eq(_node_to_tensor.at(node)));
+ }
+
+private:
+ loco::Graph _graph;
+ std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor;
+};
+
+TEST_F(KernelBuilderTest, Add)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CircleAdd>();
+ op->x(input1);
+ op->y(input2);
+
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::Add>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input1(), input1);
+ checkTensor(kernel->input2(), input2);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, ArgMax)
+{
+ auto *input = createInputNode();
+ auto *axis = createInputNode();
+
+ auto *op = createNode<luci::CircleArgMax>();
+ op->input(input);
+ op->dimension(axis);
+
+ op->output_type(loco::DataType::FLOAT32);
+
+ auto kernel = buildKernel<kernels::ArgMax>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->axis(), axis);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().output_type, Eq(op->output_type()));
+}
+
+TEST_F(KernelBuilderTest, AveragePool2D)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleAveragePool2D>();
+ op->value(input);
+
+ op->padding(luci::Padding::SAME);
+ op->filter()->h(11);
+ op->filter()->w(13);
+ op->stride()->h(17);
+ op->stride()->w(19);
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::AveragePool2D>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+ EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+ EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+ EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+ EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, BatchMatMul)
+{
+ auto *lhs = createInputNode();
+ auto *rhs = createInputNode();
+
+ auto *op = createNode<luci::CircleBatchMatMul>();
+ op->x(lhs);
+ op->y(rhs);
+ op->adj_x(false);
+ op->adj_y(false);
+
+ auto kernel = buildKernel<kernels::BatchMatMul>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->x(), lhs);
+ checkTensor(kernel->y(), rhs);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().adj_x, Eq(op->adj_x()));
+ EXPECT_THAT(kernel->params().adj_y, Eq(op->adj_y()));
+}
+
+TEST_F(KernelBuilderTest, Cast)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleCast>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Cast>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Concatenation)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CircleConcatenation>(2);
+ op->values(0, input1);
+ op->values(1, input2);
+ op->axis(11);
+
+ auto kernel = buildKernel<kernels::Concatenation>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(0), input1);
+ checkTensor(kernel->input(1), input2);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Conv2D)
+{
+ auto *input = createInputNode();
+ auto *filter = createInputNode();
+ auto *bias = createInputNode();
+
+ auto *op = createNode<luci::CircleConv2D>();
+ op->input(input);
+ op->filter(filter);
+ op->bias(bias);
+
+ op->padding(luci::Padding::SAME);
+ op->stride()->h(11);
+ op->stride()->w(13);
+ op->dilation()->h(17);
+ op->dilation()->w(19);
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::Conv2D>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->filter(), filter);
+ checkTensor(kernel->bias(), bias);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+ EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+ EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+ EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h()));
+ EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w()));
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, DepthToSpace)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleDepthToSpace>();
+ op->input(input);
+
+ op->block_size(11);
+
+ auto kernel = buildKernel<kernels::DepthToSpace>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().block_size, Eq(op->block_size()));
+}
+
+TEST_F(KernelBuilderTest, DepthwiseConv2D)
+{
+ auto *input = createInputNode();
+ auto *filter = createInputNode();
+ auto *bias = createInputNode();
+
+ auto *op = createNode<luci::CircleDepthwiseConv2D>();
+ op->input(input);
+ op->filter(filter);
+ op->bias(bias);
+
+ op->padding(luci::Padding::SAME);
+ op->depthMultiplier(11);
+ op->stride()->h(13);
+ op->stride()->w(17);
+ op->dilation()->h(19);
+ op->dilation()->w(23);
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::DepthwiseConv2D>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->filter(), filter);
+ checkTensor(kernel->bias(), bias);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+ EXPECT_THAT(kernel->params().depth_multiplier, Eq(op->depthMultiplier()));
+ EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+ EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+ EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h()));
+ EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w()));
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Div)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CircleDiv>();
+ op->x(input1);
+ op->y(input2);
+
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::Div>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input1(), input1);
+ checkTensor(kernel->input2(), input2);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Elu)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleElu>();
+ op->features(input);
+
+ auto kernel = buildKernel<kernels::Elu>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Exp)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleExp>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Exp>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Floor)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleFloor>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Floor>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, FloorDiv)
+{
+ auto *x = createInputNode();
+ auto *y = createInputNode();
+
+ auto *op = createNode<luci::CircleFloorDiv>();
+ op->x(x);
+ op->y(y);
+
+ auto kernel = buildKernel<kernels::FloorDiv>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->x(), x);
+ checkTensor(kernel->y(), y);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Equal)
+{
+ auto *x_input = createInputNode();
+ auto *y_input = createInputNode();
+
+ auto *op = createNode<luci::CircleEqual>();
+ op->x(x_input);
+ op->y(y_input);
+
+ auto kernel = buildKernel<kernels::Equal>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->x(), x_input);
+ checkTensor(kernel->y(), y_input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, FullyConnected)
+{
+ auto *input = createInputNode();
+ auto *weights = createInputNode();
+ auto *bias = createInputNode();
+
+ auto *op = createNode<luci::CircleFullyConnected>();
+ op->input(input);
+ op->weights(weights);
+ op->bias(bias);
+
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::FullyConnected>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->weights(), weights);
+ checkTensor(kernel->bias(), bias);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Greater)
+{
+ auto *x_input = createInputNode();
+ auto *y_input = createInputNode();
+
+ auto *op = createNode<luci::CircleGreater>();
+ op->x(x_input);
+ op->y(y_input);
+
+ auto kernel = buildKernel<kernels::Greater>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->x(), x_input);
+ checkTensor(kernel->y(), y_input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, GreaterEqual)
+{
+ auto *x_input = createInputNode();
+ auto *y_input = createInputNode();
+
+ auto *op = createNode<luci::CircleGreaterEqual>();
+ op->x(x_input);
+ op->y(y_input);
+
+ auto kernel = buildKernel<kernels::GreaterEqual>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->x(), x_input);
+ checkTensor(kernel->y(), y_input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, InstanceNorm)
+{
+ auto *input = createInputNode();
+ auto *gamma = createInputNode();
+ auto *beta = createInputNode();
+
+ auto *op = createNode<luci::CircleInstanceNorm>();
+ op->input(input);
+ op->gamma(gamma);
+ op->beta(beta);
+
+ op->epsilon(1e-05);
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::InstanceNorm>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->gamma(), gamma);
+ checkTensor(kernel->beta(), beta);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().epsilon, Eq(op->epsilon()));
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, L2Normalize)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleL2Normalize>();
+ op->x(input);
+
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::L2Normalize>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, L2Pool2D)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleL2Pool2D>();
+ op->value(input);
+
+ op->padding(luci::Padding::SAME);
+ op->filter()->h(11);
+ op->filter()->w(13);
+ op->stride()->h(17);
+ op->stride()->w(19);
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::L2Pool2D>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+ EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+ EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+ EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+ EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, LeakyRelu)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleLeakyRelu>();
+ op->features(input);
+
+ op->alpha(11.0f);
+
+ auto kernel = buildKernel<kernels::LeakyRelu>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
+}
+
+TEST_F(KernelBuilderTest, Less)
+{
+ auto *x_input = createInputNode();
+ auto *y_input = createInputNode();
+
+ auto *op = createNode<luci::CircleLess>();
+ op->x(x_input);
+ op->y(y_input);
+
+ auto kernel = buildKernel<kernels::Less>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->x(), x_input);
+ checkTensor(kernel->y(), y_input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LessEqual)
+{
+ auto *x_input = createInputNode();
+ auto *y_input = createInputNode();
+
+ auto *op = createNode<luci::CircleLessEqual>();
+ op->x(x_input);
+ op->y(y_input);
+
+ auto kernel = buildKernel<kernels::LessEqual>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->x(), x_input);
+ checkTensor(kernel->y(), y_input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LocalResponseNormalization)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleLocalResponseNormalization>();
+ op->input(input);
+
+ op->radius(11);
+ op->bias(13.0f);
+ op->alpha(15.0f);
+ op->beta(17.0f);
+
+ auto kernel = buildKernel<kernels::LocalResponseNormalization>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().radius, Eq(op->radius()));
+ EXPECT_THAT(kernel->params().bias, Eq(op->bias()));
+ EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
+ EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
+}
+
+TEST_F(KernelBuilderTest, LogicalAnd)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CircleLogicalAnd>();
+ op->x(input1);
+ op->y(input2);
+
+ auto kernel = buildKernel<kernels::LogicalAnd>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input1(), input1);
+ checkTensor(kernel->input2(), input2);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LogicalNot)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleLogicalNot>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::LogicalNot>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LogicalOr)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CircleLogicalOr>();
+ op->x(input1);
+ op->y(input2);
+
+ auto kernel = buildKernel<kernels::LogicalOr>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input1(), input1);
+ checkTensor(kernel->input2(), input2);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Logistic)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleLogistic>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Logistic>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LogSoftmax)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleLogSoftmax>();
+ op->logits(input);
+
+ auto kernel = buildKernel<kernels::LogSoftmax>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Maximum)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CircleMaximum>();
+ op->x(input1);
+ op->y(input2);
+
+ auto kernel = buildKernel<kernels::Maximum>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input1(), input1);
+ checkTensor(kernel->input2(), input2);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, MaxPool2D)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleMaxPool2D>();
+ op->value(input);
+
+ op->padding(luci::Padding::SAME);
+ op->filter()->h(11);
+ op->filter()->w(13);
+ op->stride()->h(17);
+ op->stride()->w(19);
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::MaxPool2D>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+ EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+ EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+ EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+ EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Mean)
+{
+ auto *input = createInputNode();
+ auto *axes = createInputNode();
+
+ auto *op = createNode<luci::CircleMean>();
+ op->input(input);
+ op->reduction_indices(axes);
+
+ op->keep_dims(true);
+
+ auto kernel = buildKernel<kernels::Mean>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->axes(), axes);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().keep_dims, Eq(op->keep_dims()));
+}
+
+TEST_F(KernelBuilderTest, Minimum)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CircleMinimum>();
+ op->x(input1);
+ op->y(input2);
+
+ auto kernel = buildKernel<kernels::Minimum>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input1(), input1);
+ checkTensor(kernel->input2(), input2);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Mul)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CircleMul>();
+ op->x(input1);
+ op->y(input2);
+
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::Mul>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input1(), input1);
+ checkTensor(kernel->input2(), input2);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Neg)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleNeg>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Neg>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, NotEqual)
+{
+ auto *x_input = createInputNode();
+ auto *y_input = createInputNode();
+
+ auto *op = createNode<luci::CircleNotEqual>();
+ op->x(x_input);
+ op->y(y_input);
+
+ auto kernel = buildKernel<kernels::NotEqual>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->x(), x_input);
+ checkTensor(kernel->y(), y_input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, OneHot)
+{
+ auto *indices = createInputNode();
+ auto *depth = createInputNode();
+ auto *on_value = createInputNode();
+ auto *off_value = createInputNode();
+ auto axis = 1;
+
+ auto *op = createNode<luci::CircleOneHot>();
+ op->indices(indices);
+ op->depth(depth);
+ op->on_value(on_value);
+ op->off_value(off_value);
+ op->axis(axis);
+
+ auto kernel = buildKernel<kernels::OneHot>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->indices(), indices);
+ checkTensor(kernel->depth(), depth);
+ checkTensor(kernel->on_value(), on_value);
+ checkTensor(kernel->off_value(), off_value);
+ EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+}
+
+TEST_F(KernelBuilderTest, Pad)
+{
+ auto *input = createInputNode();
+ auto *paddings = createInputNode();
+
+ auto *op = createNode<luci::CirclePad>();
+ op->input(input);
+ op->paddings(paddings);
+
+ auto kernel = buildKernel<kernels::Pad>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->paddings(), paddings);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, PadV2)
+{
+ auto *input = createInputNode();
+ auto *paddings = createInputNode();
+ auto *constant_values = createInputNode();
+
+ auto *op = createNode<luci::CirclePadV2>();
+ op->input(input);
+ op->paddings(paddings);
+ op->constant_values(constant_values);
+
+ auto kernel = buildKernel<kernels::PadV2>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->paddings(), paddings);
+ checkTensor(kernel->constant_values(), constant_values);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Pow)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CirclePow>();
+ op->x(input1);
+ op->y(input2);
+
+ auto kernel = buildKernel<kernels::Pow>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input1(), input1);
+ checkTensor(kernel->input2(), input2);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, PRelu)
+{
+ auto *input = createInputNode();
+ auto *alpha = createInputNode();
+
+ auto *op = createNode<luci::CirclePRelu>();
+ op->input(input);
+ op->alpha(alpha);
+
+ auto kernel = buildKernel<kernels::PRelu>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->alpha(), alpha);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Relu)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleRelu>();
+ op->features(input);
+
+ auto kernel = buildKernel<kernels::Relu>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Relu6)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleRelu6>();
+ op->features(input);
+
+ auto kernel = buildKernel<kernels::Relu6>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Reshape)
+{
+ auto *input = createInputNode();
+ auto *shape = createInputNode();
+
+ auto *op = createNode<luci::CircleReshape>();
+ op->tensor(input);
+ op->shape(shape);
+
+ auto kernel = buildKernel<kernels::Reshape>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->shape(), shape);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, ResizeBilinear)
+{
+ auto *input = createInputNode();
+ auto *size = createInputNode();
+
+ auto *op = createNode<luci::CircleResizeBilinear>();
+ op->input(input);
+ op->size(size);
+ op->align_corners(true);
+ op->half_pixel_centers(true);
+
+ auto kernel = buildKernel<kernels::ResizeBilinear>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->size(), size);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners()));
+ EXPECT_THAT(kernel->params().half_pixel_centers, Eq(op->half_pixel_centers()));
+}
+
+TEST_F(KernelBuilderTest, ResizeNearestNeighbor)
+{
+ auto *input = createInputNode();
+ auto *size = createInputNode();
+
+ auto *op = createNode<luci::CircleResizeNearestNeighbor>();
+ op->input(input);
+ op->size(size);
+ op->align_corners(true);
+
+ auto kernel = buildKernel<kernels::ResizeNearestNeighbor>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->size(), size);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners()));
+ // TODO currently half_pixel_centers are not implemented on CircleResizeNearestNeighbor
+ // after adding, need to be updated.
+}
+
+TEST_F(KernelBuilderTest, ReverseV2)
+{
+ auto *input = createInputNode();
+ auto *axes = createInputNode();
+
+ auto *op = createNode<luci::CircleReverseV2>();
+ op->tensor(input);
+ op->axis(axes);
+
+ auto kernel = buildKernel<kernels::ReverseV2>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->axes(), axes);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Rsqrt)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleRsqrt>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Rsqrt>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Slice)
+{
+ auto *input = createInputNode();
+ auto *begin = createInputNode();
+ auto *size = createInputNode();
+
+ auto *op = createNode<luci::CircleSlice>();
+ op->input(input);
+ op->begin(begin);
+ op->size(size);
+
+ auto kernel = buildKernel<kernels::Slice>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->begin(), begin);
+ checkTensor(kernel->size(), size);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Softmax)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleSoftmax>();
+ op->logits(input);
+
+ op->beta(11.0f);
+
+ auto kernel = buildKernel<kernels::Softmax>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
+}
+
+TEST_F(KernelBuilderTest, SpaceToDepth)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleSpaceToDepth>();
+ op->input(input);
+
+ op->block_size(11);
+
+ auto kernel = buildKernel<kernels::SpaceToDepth>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().block_size, op->block_size());
+}
+
+TEST_F(KernelBuilderTest, Split)
+{
+ auto *axis = createInputNode();
+ auto *input = createInputNode();
+ auto *op = createNode<luci::CircleSplit>();
+ auto *output1 = createNodeOut<luci::CircleSplitOut>(op, 0);
+ auto *output2 = createNodeOut<luci::CircleSplitOut>(op, 1);
+
+ op->split_dim(axis);
+ op->input(input);
+
+ op->num_split(2);
+
+ auto kernel = buildKernel<kernels::Split>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->axis(), axis);
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(0), output1);
+ checkTensor(kernel->output(1), output2);
+}
+
+TEST_F(KernelBuilderTest, SplitV)
+{
+ auto *input = createInputNode();
+ auto *size_splits = createInputNode();
+ auto *axis = createInputNode();
+ auto *op = createNode<luci::CircleSplitV>();
+ auto *output0 = createNodeOut<luci::CircleSplitVOut>(op, 0);
+ auto *output1 = createNodeOut<luci::CircleSplitVOut>(op, 1);
+
+ op->input(input);
+ op->size_splits(size_splits);
+ op->split_dim(axis);
+
+ op->num_split(2);
+
+ auto kernel = buildKernel<kernels::SplitV>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->size_splits(), size_splits);
+ checkTensor(kernel->axis(), axis);
+ checkTensor(kernel->output(0), output0);
+ checkTensor(kernel->output(1), output1);
+}
+
+TEST_F(KernelBuilderTest, Sqrt)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleSqrt>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Sqrt>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, SquaredDifference)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CircleSquaredDifference>();
+ op->x(input1);
+ op->y(input2);
+
+ auto kernel = buildKernel<kernels::SquaredDifference>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input1(), input1);
+ checkTensor(kernel->input2(), input2);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Squeeze)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleSqueeze>();
+ op->input(input);
+
+ op->squeeze_dims({11, 13});
+
+ auto kernel = buildKernel<kernels::Squeeze>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().squeeze_dims, ElementsAreArray(op->squeeze_dims()));
+}
+
+TEST_F(KernelBuilderTest, StridedSlice)
+{
+ auto *input = createInputNode();
+ auto *begin = createInputNode();
+ auto *end = createInputNode();
+ auto *strides = createInputNode();
+
+ auto *op = createNode<luci::CircleStridedSlice>();
+ op->input(input);
+ op->begin(begin);
+ op->end(end);
+ op->strides(strides);
+
+ op->begin_mask(11);
+ op->ellipsis_mask(13);
+ op->end_mask(17);
+ op->new_axis_mask(19);
+ op->shrink_axis_mask(23);
+
+ auto kernel = buildKernel<kernels::StridedSlice>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->begin(), begin);
+ checkTensor(kernel->end(), end);
+ checkTensor(kernel->strides(), strides);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().begin_mask, Eq(op->begin_mask()));
+ EXPECT_THAT(kernel->params().ellipsis_mask, Eq(op->ellipsis_mask()));
+ EXPECT_THAT(kernel->params().end_mask, Eq(op->end_mask()));
+ EXPECT_THAT(kernel->params().new_axis_mask, Eq(op->new_axis_mask()));
+ EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask()));
+}
+
+TEST_F(KernelBuilderTest, Sub)
+{
+ auto *input1 = createInputNode();
+ auto *input2 = createInputNode();
+
+ auto *op = createNode<luci::CircleSub>();
+ op->x(input1);
+ op->y(input2);
+
+ op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+ auto kernel = buildKernel<kernels::Sub>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input1(), input1);
+ checkTensor(kernel->input2(), input2);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Tanh)
+{
+ auto *input = createInputNode();
+
+ auto *op = createNode<luci::CircleTanh>();
+ op->x(input);
+
+ auto kernel = buildKernel<kernels::Tanh>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Transpose)
+{
+ auto *input = createInputNode();
+ auto *perm = createInputNode();
+
+ auto *op = createNode<luci::CircleTranspose>();
+ op->a(input);
+ op->perm(perm);
+
+ auto kernel = buildKernel<kernels::Transpose>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->perm(), perm);
+ checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, TransposeConv)
+{
+ auto *output_shape = createInputNode();
+ auto *filter = createInputNode();
+ auto *input = createInputNode();
+ auto *bias = createInputNode();
+
+ auto *op = createNode<luci::CircleTransposeConv>();
+ op->inputSizes(output_shape);
+ op->filter(filter);
+ op->outBackprop(input);
+ op->bias(bias);
+
+ op->padding(luci::Padding::SAME);
+ op->stride()->h(11);
+ op->stride()->w(13);
+
+ auto kernel = buildKernel<kernels::TransposeConv>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->output_shape(), output_shape);
+ checkTensor(kernel->filter(), filter);
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(), op);
+ checkTensor(kernel->bias(), bias);
+ EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+ EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+ EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+}
+
+TEST_F(KernelBuilderTest, Unpack)
+{
+ auto *input = createInputNode();
+ auto *op = createNode<luci::CircleUnpack>();
+ auto *output1 = createNodeOut<luci::CircleUnpackOut>(op, 0);
+ auto *output2 = createNodeOut<luci::CircleUnpackOut>(op, 1);
+
+ op->value(input);
+
+ op->num(2);
+ op->axis(11);
+
+ auto kernel = buildKernel<kernels::Unpack>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->output(0), output1);
+ checkTensor(kernel->output(1), output2);
+ EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+}
+
+TEST_F(KernelBuilderTest, NonExisting1_NEG)
+{
+ auto *op = createNode<luci::CircleConst>();
+ ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+TEST_F(KernelBuilderTest, NonExisting2_NEG)
+{
+ auto *op = createNode<luci::CircleInput>();
+ ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+TEST_F(KernelBuilderTest, NonExisting3_NEG)
+{
+ auto *op = createNode<luci::CircleOutput>();
+ ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp
new file mode 100644
index 000000000..23c96a6db
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/KernelBuilderHelper.h"
+
+#include <luci/IR/Nodes/CircleOutput.h>
+
+namespace luci_interpreter
+{
+
+const Tensor *KernelBuilderHelper::getInputTensor(const loco::Node *node) const
+{
+ const Tensor *tensor = _node_to_tensor.at(node);
+ assert(tensor != nullptr);
+ return tensor;
+}
+
+const Tensor *KernelBuilderHelper::getOptionalInputTensor(const loco::Node *node) const
+{
+ if (dynamic_cast<const luci::CircleOutputExclude *>(node))
+ {
+ return nullptr;
+ }
+ return getInputTensor(node);
+}
+
+Tensor *KernelBuilderHelper::getOutputTensor(const loco::Node *node) const
+{
+ Tensor *tensor = _node_to_tensor.at(node);
+ assert(tensor != nullptr);
+ return tensor;
+}
+
+std::vector<Tensor *>
+KernelBuilderHelper::getOutputTensors(const std::vector<const loco::Node *> &nodes) const
+{
+ std::vector<Tensor *> tensors;
+ tensors.reserve(nodes.size());
+ for (const loco::Node *node : nodes)
+ tensors.push_back(getOutputTensor(node));
+ return tensors;
+}
+
+RuntimeGraph *KernelBuilderHelper::getRuntimeGraph(const loco::Graph *graph) const
+{
+ RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
+ assert(runtime_graph != nullptr);
+ return runtime_graph;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h
new file mode 100644
index 000000000..d6fb253b1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
+#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+#include <loco/IR/Graph.h>
+#include <loco/IR/Node.h>
+
+#include <vector>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class KernelBuilderHelper
+{
+public:
+ KernelBuilderHelper(
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+ const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+ : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
+ {
+ }
+
+public:
+ const Tensor *getInputTensor(const loco::Node *node) const;
+ const Tensor *getOptionalInputTensor(const loco::Node *node) const;
+
+ Tensor *getOutputTensor(const loco::Node *node) const;
+ std::vector<Tensor *> getOutputTensors(const std::vector<const loco::Node *> &nodes) const;
+
+ RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const;
+
+public:
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph() const
+ {
+ return _graph_to_runtime_graph;
+ }
+
+ const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor() const
+ {
+ return _node_to_tensor;
+ }
+
+private:
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
+ const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
+};
+
+template <typename CircleNodeOut>
+std::vector<const loco::Node *> collectOutputNodes(const loco::Node *node)
+{
+ std::vector<const CircleNodeOut *> output_nodes;
+ for (const loco::Node *loco_node : loco::succs(node))
+ {
+ output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node));
+ }
+ std::sort(output_nodes.begin(), output_nodes.end(),
+ [](const CircleNodeOut *node1, const CircleNodeOut *node2) {
+ return node1->index() < node2->index();
+ });
+ return {output_nodes.cbegin(), output_nodes.cend()};
+}
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp
new file mode 100644
index 000000000..2f278b087
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleLoader.h"
+
+#include "GraphLoader.h"
+
+namespace luci_interpreter
+{
+
+ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
+ RuntimeToIR &runtime_to_ir,
+ std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+ IMemoryManager *memory_manager)
+ : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir),
+ _node_to_tensor(node_to_tensor), _memory_manager(memory_manager)
+{
+}
+
+void ModuleLoader::load()
+{
+ // Runtime graphs have to be created in advance, because they will be needed during the loading
+ // process for control flow nodes.
+ for (size_t i = 0; i < _module->size(); ++i)
+ {
+ _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph(_memory_manager));
+ }
+ for (size_t i = 0; i < _module->size(); ++i)
+ {
+ const loco::Graph *graph = _module->graph(i);
+ RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
+ GraphLoader loader(graph, runtime_graph, _runtime_to_ir, _graph_to_runtime_graph,
+ _node_to_tensor, _memory_manager);
+ loader.loadTensors();
+ loader.initInputOutputTensors();
+ loader.loadOperators();
+ }
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h
new file mode 100644
index 000000000..11326a2ee
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_MODULELOADER_H
+#define LUCI_INTERPRETER_LOADER_MODULELOADER_H
+
+#include "core/RuntimeModule.h"
+#include "loader/RuntimeToIR.h"
+#include "luci_interpreter/MemoryManager.h"
+
+#include <luci/IR/Module.h>
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class ModuleLoader
+{
+public:
+ ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
+ RuntimeToIR &runtime_to_ir,
+ std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+ IMemoryManager *memory_manager);
+
+ void load();
+
+private:
+ IMemoryManager *_memory_manager;
+ const luci::Module *_module;
+ RuntimeModule *_runtime_module;
+ RuntimeToIR &_runtime_to_ir;
+ std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
+ std::unordered_map<const loco::Graph *, RuntimeGraph *> _graph_to_runtime_graph;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_MODULELOADER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h b/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h
new file mode 100644
index 000000000..9ea8b1fa2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
+#define LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+// Maps runtime entities back to IR entities. It is used to implement observing functionality.
+struct RuntimeToIR
+{
+ std::unordered_map<const Tensor *, const luci::CircleNode *> tensor_to_node;
+ std::unordered_map<const Kernel *, const luci::CircleNode *> kernel_to_node;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp
new file mode 100644
index 000000000..501e84752
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Add.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAdd(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleAdd *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ AddParams params{};
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::Add>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp
new file mode 100644
index 000000000..f3ca55744
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ArgMax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleArgMax(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleArgMax *>(circle_node);
+ assert(node->arity() == 2);
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *axis = helper.getInputTensor(node->dimension());
+ Tensor *output = helper.getOutputTensor(node);
+
+ ArgMaxParams params{};
+ params.output_type = node->output_type();
+
+ return std::make_unique<kernels::ArgMax>(input, axis, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
new file mode 100644
index 000000000..a8135706f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/AveragePool2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->value());
+ Tensor *output = helper.getOutputTensor(node);
+
+ Pool2DParams params{};
+ params.padding = node->padding();
+ params.filter_height = node->filter()->h();
+ params.filter_width = node->filter()->w();
+ params.stride_height = node->stride()->h();
+ params.stride_width = node->stride()->w();
+ params.activation = node->fusedActivationFunction();
+
+ // It is unknown what data will be stored in scratchpad tensor,
+ // using UINT8 as a most general option
+ auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+ scratchpad->set_observable(false);
+ scratchpad->set_data_buffer(nullptr);
+ // If node has execution plan then read memory offsets for scratchpad temporary tensor
+ // from the beginning of shared memory buffer.
+ // Used in Static Memory Manager.
+ // TODO move tensors offset initialization to one place
+ if (luci::has_execution_plan(node))
+ {
+ const auto execution_plan = luci::get_execution_plan(node);
+ // Check whether the offset for the current CircleConv2D temporary was found.
+ if (execution_plan.offsets().size() > 1)
+ // If this is true, then we keep this offset in scratchpad.
+ scratchpad->set_offset(execution_plan.offsets().at(1));
+ }
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+ return std::make_unique<kernels::AveragePool2D>(input, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
new file mode 100644
index 000000000..9da2f6d93
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchMatMul.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *lhs = helper.getInputTensor(node->x());
+ const Tensor *rhs = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ auto lhs_scratchpad =
+ std::make_unique<Tensor>(lhs->element_type(), Shape({}), AffineQuantization{}, "");
+ lhs_scratchpad->set_observable(false);
+ lhs_scratchpad->set_data_buffer(nullptr);
+ auto rhs_scratchpad =
+ std::make_unique<Tensor>(rhs->element_type(), Shape({}), AffineQuantization{}, "");
+ rhs_scratchpad->set_observable(false);
+ rhs_scratchpad->set_data_buffer(nullptr);
+ // If node has execution plan then read memory offsets for scratchpad temporary tensor
+ // from the beginning of shared memory buffer.
+ // Used in Static Memory Manager.
+ // TODO move tensors offset initialization to one place
+ if (luci::has_execution_plan(node))
+ {
+ const auto execution_plan = luci::get_execution_plan(node);
+ // Check whether the offset for the current BatchMatMul temporary was found.
+ if (execution_plan.offsets().size() > 1)
+ {
+ assert(execution_plan.offsets().size() == 3);
+
+ // If this is true, then we keep this offset in scratchpad.
+ lhs_scratchpad->set_offset(execution_plan.offsets().at(1));
+ rhs_scratchpad->set_offset(execution_plan.offsets().at(2));
+ }
+ }
+ Tensor *lhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(lhs_scratchpad));
+ Tensor *rhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(rhs_scratchpad));
+
+ BatchMatMulParams params;
+ params.adj_x = node->adj_x();
+ params.adj_y = node->adj_y();
+
+ return std::make_unique<kernels::BatchMatMul>(lhs, rhs, output, lhs_tmp, rhs_tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
new file mode 100644
index 000000000..ac6ebb30f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchToSpaceND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleBatchToSpaceND *>(circle_node);
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *block_shape = helper.getInputTensor(node->block_shape());
+ const Tensor *crops = helper.getInputTensor(node->crops());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h
new file mode 100644
index 000000000..eab284008
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
+#define LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
+
+#include "loader/KernelBuilderHelper.h"
+
+#include "luci/IR/CircleNodes.h"
+
+namespace luci_interpreter
+{
+
+#define REGISTER_KERNEL(name) \
+ std::unique_ptr<Kernel> build_kernel_Circle##name(const luci::CircleNode *circle_node, \
+ KernelBuilderHelper &helper);
+
+#include "KernelsToBuild.lst"
+
+#undef REGISTER_KERNEL
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp
new file mode 100644
index 000000000..a16354c96
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Cast.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleCast(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleCast *>(circle_node);
+
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Cast>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp
new file mode 100644
index 000000000..ba2564ea2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Concatenation.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConcatenation(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleConcatenation *>(circle_node);
+ std::vector<const Tensor *> inputs(node->numValues());
+ for (uint32_t i = 0; i < node->numValues(); ++i)
+ {
+ inputs[i] = helper.getInputTensor(node->values(i));
+ }
+ Tensor *output = helper.getOutputTensor(node);
+
+ ConcatenationParams params{};
+ params.axis = node->axis();
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp
new file mode 100644
index 000000000..218165e20
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Conv2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleConv2D *>(circle_node);
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *filter = helper.getInputTensor(node->filter());
+ const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+ Tensor *output = helper.getOutputTensor(node);
+
+ // It is unknown what data will be stored in scratchpad tensor,
+ // using UINT8 as a most general option
+ auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+ scratchpad->set_observable(false);
+ scratchpad->set_data_buffer(nullptr);
+ // If node has execution plan then read memory offsets for scratchpad temporary tensor
+ // from the beginning of shared memory buffer.
+ // Used in Static Memory Manager.
+ // TODO move tensors offset initialization to one place
+ if (luci::has_execution_plan(node))
+ {
+ const auto execution_plan = luci::get_execution_plan(node);
+ // Check whether the offset for the current CircleConv2D temporary was found.
+ if (execution_plan.offsets().size() > 1)
+ // If this is true, then we keep this offset in scratchpad.
+ scratchpad->set_offset(execution_plan.offsets().at(1));
+ }
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+ Conv2DParams params{};
+ params.padding = node->padding();
+ params.stride_height = node->stride()->h();
+ params.stride_width = node->stride()->w();
+ params.dilation_height_factor = node->dilation()->h();
+ params.dilation_width_factor = node->dilation()->w();
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::Conv2D>(input, filter, bias, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
new file mode 100644
index 000000000..174946367
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthToSpace.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleDepthToSpace *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ Tensor *output = helper.getOutputTensor(node);
+
+ DepthToSpaceParams params{};
+ params.block_size = node->block_size();
+
+ return std::make_unique<kernels::DepthToSpace>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..8af1e3b58
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthwiseConv2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *filter = helper.getInputTensor(node->filter());
+ const Tensor *bias = helper.getInputTensor(node->bias());
+ Tensor *output = helper.getOutputTensor(node);
+
+ DepthwiseConv2DParams params{};
+ params.padding = node->padding();
+ params.depth_multiplier = node->depthMultiplier();
+ params.stride_height = node->stride()->h();
+ params.stride_width = node->stride()->w();
+ params.dilation_height_factor = node->dilation()->h();
+ params.dilation_width_factor = node->dilation()->w();
+ params.activation = node->fusedActivationFunction();
+
+ // It is unknown what data will be stored in scratchpad tensor,
+ // using UINT8 as a most general option
+ auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+ scratchpad->set_observable(false);
+ scratchpad->set_data_buffer(nullptr);
+ // If node has execution plan then read memory offsets for scratchpad temporary tensor
+ // from the beginning of shared memory buffer.
+ // Used in Static Memory Manager.
+ // TODO move tensors offset initialization to one place
+ if (luci::has_execution_plan(node))
+ {
+ const auto execution_plan = luci::get_execution_plan(node);
+ // Check whether the offset for the current CircleConv2D temporary was found.
+ if (execution_plan.offsets().size() > 1)
+ // If this is true, then we keep this offset in scratchpad.
+ scratchpad->set_offset(execution_plan.offsets().at(1));
+ }
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+ return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp
new file mode 100644
index 000000000..787322e9b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Dequantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleDequantize *>(circle_node);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Dequantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp
new file mode 100644
index 000000000..0611dfdab
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Div.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDiv(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleDiv *>(circle_node);
+ assert(node->arity() == 2);
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ DivParams params{};
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::Div>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp
new file mode 100644
index 000000000..a79985e3b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Elu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleElu(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleElu *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->features());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Elu>(input, output);
+}
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp
new file mode 100644
index 000000000..59692883f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Equal.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleEqual(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+
+{
+ const auto *node = loco::must_cast<const luci::CircleEqual *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *x = helper.getInputTensor(node->x());
+ const Tensor *y = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Equal>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp
new file mode 100644
index 000000000..30d11cb89
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Exp.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExp(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleExp *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Exp>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp
new file mode 100644
index 000000000..9840c34e5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ExpandDims.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExpandDims(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleExpandDims *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *axis = helper.getInputTensor(node->axis());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::ExpandDims>(input, axis, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp
new file mode 100644
index 000000000..3aefdf1c5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Fill.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFill(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleFill *>(circle_node);
+ assert(node->arity() == 2);
+
+ const auto dims = helper.getInputTensor(node->dims());
+ const auto value = helper.getInputTensor(node->value());
+ auto output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Fill>(dims, value, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp
new file mode 100644
index 000000000..e0a223116
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Floor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloor(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleFloor *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Floor>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp
new file mode 100644
index 000000000..a45d89e38
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FloorDiv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleFloorDiv *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *x = helper.getInputTensor(node->x());
+ const Tensor *y = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::FloorDiv>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp
new file mode 100644
index 000000000..b7b742b8a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FullyConnected.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleFullyConnected *>(circle_node);
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *weights = helper.getInputTensor(node->weights());
+ const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+ Tensor *output = helper.getOutputTensor(node);
+
+ FullyConnectedParams params{};
+ params.activation = node->fusedActivationFunction();
+ params.keep_num_dims = node->keep_num_dims();
+
+ return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp
new file mode 100644
index 000000000..2ee2906e0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Gather.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleGather *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *params = helper.getInputTensor(node->params());
+ const Tensor *indices = helper.getInputTensor(node->indices());
+ Tensor *output = helper.getOutputTensor(node);
+
+ GatherParams gparams{};
+ gparams.axis = node->axis();
+ // TODO support batch_dims
+ gparams.batch_dims = 0;
+
+ return std::make_unique<kernels::Gather>(params, indices, output, gparams);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp
new file mode 100644
index 000000000..80aa63cf0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Greater.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreater(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleGreater *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *x = helper.getInputTensor(node->x());
+ const Tensor *y = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Greater>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
new file mode 100644
index 000000000..272f2843b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/GreaterEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleGreaterEqual *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *x = helper.getInputTensor(node->x());
+ const Tensor *y = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::GreaterEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp
new file mode 100644
index 000000000..3ac7d4941
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/If.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleIf(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleIf *>(circle_node);
+ auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node);
+ assert(node->arity() == 1 + node->input_count());
+ assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
+
+ const Tensor *cond = helper.getInputTensor(node->cond());
+ std::vector<const Tensor *> inputs(node->input_count());
+ for (uint32_t i = 0; i < node->input_count(); ++i)
+ {
+ inputs[i] = helper.getInputTensor(node->input(i));
+ }
+ std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+ RuntimeGraph *then_graph = helper.getRuntimeGraph(node->then_graph());
+ RuntimeGraph *else_graph = helper.getRuntimeGraph(node->else_graph());
+
+ return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph,
+ else_graph);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
new file mode 100644
index 000000000..06031e5bc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/InstanceNorm.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleInstanceNorm *>(circle_node);
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *gamma = helper.getInputTensor(node->gamma());
+ const Tensor *beta = helper.getInputTensor(node->beta());
+
+ Tensor *output = helper.getOutputTensor(node);
+
+ InstanceNormParams params{};
+ params.epsilon = node->epsilon();
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp
new file mode 100644
index 000000000..6e22e6d4e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Normalize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleL2Normalize *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ L2NormParams params{};
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::L2Normalize>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
new file mode 100644
index 000000000..95b55896f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Pool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleL2Pool2D *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->value());
+ Tensor *output = helper.getOutputTensor(node);
+
+ Pool2DParams params{};
+ params.padding = node->padding();
+ params.filter_height = node->filter()->h();
+ params.filter_width = node->filter()->w();
+ params.stride_height = node->stride()->h();
+ params.stride_width = node->stride()->w();
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::L2Pool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
new file mode 100644
index 000000000..bbf5067b1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LeakyRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleLeakyRelu *>(circle_node);
+ assert(node->arity() == 1);
+ const Tensor *input = helper.getInputTensor(node->features());
+ Tensor *output = helper.getOutputTensor(node);
+
+ LeakyReluParams params{};
+ params.alpha = node->alpha();
+
+ return std::make_unique<kernels::LeakyRelu>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp
new file mode 100644
index 000000000..ae914ecc9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Less.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLess(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleLess *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *x = helper.getInputTensor(node->x());
+ const Tensor *y = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Less>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp
new file mode 100644
index 000000000..f1b424b55
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LessEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLessEqual(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleLessEqual *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *x = helper.getInputTensor(node->x());
+ const Tensor *y = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::LessEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
new file mode 100644
index 000000000..962ca2d7c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LocalResponseNormalization.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleLocalResponseNormalization *>(circle_node);
+ assert(node->arity() == 1);
+ const Tensor *input = helper.getInputTensor(node->input());
+ Tensor *output = helper.getOutputTensor(node);
+
+ LocalResponseNormalizationParams params{};
+ params.radius = node->radius();
+ params.bias = node->bias();
+ params.alpha = node->alpha();
+ params.beta = node->beta();
+
+ return std::make_unique<kernels::LocalResponseNormalization>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
new file mode 100644
index 000000000..432204115
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogSoftmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleLogSoftmax *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->logits());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::LogSoftmax>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
new file mode 100644
index 000000000..bf3cb671a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalAnd.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleLogicalAnd *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::LogicalAnd>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp
new file mode 100644
index 000000000..fefcd9a06
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalNot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleLogicalNot *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::LogicalNot>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp
new file mode 100644
index 000000000..a416cb401
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalOr.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleLogicalOr *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::LogicalOr>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp
new file mode 100644
index 000000000..4a69deef1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Logistic.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogistic(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleLogistic *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Logistic>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
new file mode 100644
index 000000000..f66a206ca
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MaxPool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleMaxPool2D *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->value());
+ Tensor *output = helper.getOutputTensor(node);
+
+ Pool2DParams params{};
+ params.padding = node->padding();
+ params.filter_height = node->filter()->h();
+ params.filter_width = node->filter()->w();
+ params.stride_height = node->stride()->h();
+ params.stride_width = node->stride()->w();
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::MaxPool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp
new file mode 100644
index 000000000..d0bff776a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Maximum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaximum(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleMaximum *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Maximum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp
new file mode 100644
index 000000000..0dec63e79
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mean.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMean(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleMean *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *axes = helper.getInputTensor(node->reduction_indices());
+ Tensor *output = helper.getOutputTensor(node);
+
+ auto temp_index_unique =
+ std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+ temp_index_unique->set_observable(false);
+ temp_index_unique->set_data_buffer(nullptr);
+ Tensor *temp_index =
+ helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
+
+ auto resolved_axes_unique =
+ std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+ resolved_axes_unique->set_observable(false);
+ resolved_axes_unique->set_data_buffer(nullptr);
+ Tensor *resolved_axes =
+ helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
+
+ auto temp_sum_unique =
+ std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
+ temp_sum_unique->set_observable(false);
+ temp_sum_unique->set_data_buffer(nullptr);
+ Tensor *temp_sum = helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_sum_unique));
+
+ ReducerParams params{};
+ params.keep_dims = node->keep_dims();
+
+ return std::make_unique<kernels::Mean>(input, axes, output, temp_index, resolved_axes, temp_sum,
+ params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp
new file mode 100644
index 000000000..1a49c1090
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Minimum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMinimum(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleMinimum *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Minimum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp
new file mode 100644
index 000000000..b221b4574
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MirrorPad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleMirrorPad *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *paddings = helper.getInputTensor(node->paddings());
+ Tensor *output = helper.getOutputTensor(node);
+
+ MirrorPadParams params{};
+ params.mode = node->mode();
+
+ return std::make_unique<kernels::MirrorPad>(input, paddings, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp
new file mode 100644
index 000000000..f9984853a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mul.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMul(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleMul *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ MulParams params{};
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::Mul>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp
new file mode 100644
index 000000000..9a9ecf991
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Neg.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNeg(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleNeg *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Neg>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp
new file mode 100644
index 000000000..3916a5854
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/NotEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNotEqual(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleNotEqual *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *x = helper.getInputTensor(node->x());
+ const Tensor *y = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::NotEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp
new file mode 100644
index 000000000..a40160945
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/OneHot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleOneHot(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleOneHot *>(circle_node);
+ assert(node->arity() == 4);
+
+ const Tensor *indices = helper.getInputTensor(node->indices());
+ const Tensor *depth = helper.getInputTensor(node->depth());
+ const Tensor *on_value = helper.getInputTensor(node->on_value());
+ const Tensor *off_value = helper.getInputTensor(node->off_value());
+ Tensor *output = helper.getOutputTensor(node);
+
+ OneHotParams params{};
+ params.axis = node->axis();
+
+ return std::make_unique<kernels::OneHot>(indices, depth, on_value, off_value, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp
new file mode 100644
index 000000000..f3d700c95
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePRelu(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CirclePRelu *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *alpha = helper.getInputTensor(node->alpha());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::PRelu>(input, alpha, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp
new file mode 100644
index 000000000..efc5850e0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePack(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CirclePack *>(circle_node);
+ assert(node->arity() == node->values_count());
+
+ std::vector<const Tensor *> inputs(node->values_count());
+ for (uint32_t i = 0; i < node->values_count(); ++i)
+ {
+ inputs[i] = helper.getInputTensor(node->values(i));
+ }
+ Tensor *output = helper.getOutputTensor(node);
+
+ PackParams params{};
+ params.axis = node->axis();
+ params.values_count = node->values_count();
+
+ return std::make_unique<kernels::Pack>(std::move(inputs), output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp
new file mode 100644
index 000000000..67ce997a7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePad(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CirclePad *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *paddings = helper.getInputTensor(node->paddings());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Pad>(input, paddings, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp
new file mode 100644
index 000000000..e378a972a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PadV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePadV2(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CirclePadV2 *>(circle_node);
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *paddings = helper.getInputTensor(node->paddings());
+ const Tensor *constant_values = helper.getInputTensor(node->constant_values());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp
new file mode 100644
index 000000000..d32fc3dbb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pow.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePow(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CirclePow *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Pow>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp
new file mode 100644
index 000000000..cb36fb6da
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Quantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleQuantize *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Quantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp
new file mode 100644
index 000000000..1d64c1c4e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleRelu *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->features());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Relu>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp
new file mode 100644
index 000000000..e50cd2545
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu6.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu6(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleRelu6 *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->features());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Relu6>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp
new file mode 100644
index 000000000..76ddd88a3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Reshape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReshape(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleReshape *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->tensor());
+ const Tensor *shape = helper.getInputTensor(node->shape());
+ Tensor *output = helper.getOutputTensor(node);
+
+ // NOTE 'newShape' attribute is ignored.
+ return std::make_unique<kernels::Reshape>(input, shape, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
new file mode 100644
index 000000000..dc2b88ad3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeBilinear.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleResizeBilinear *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *size = helper.getInputTensor(node->size());
+ Tensor *output = helper.getOutputTensor(node);
+
+ ResizeBilinearParams params{};
+ params.align_corners = node->align_corners();
+ params.half_pixel_centers = node->half_pixel_centers();
+
+ return std::make_unique<kernels::ResizeBilinear>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
new file mode 100644
index 000000000..c7058ae78
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *size = helper.getInputTensor(node->size());
+ Tensor *output = helper.getOutputTensor(node);
+
+ ResizeNearestNeighborParams params{};
+ params.align_corners = node->align_corners();
+ // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated
+ // Current CircleResizeNearestNeighbor don't have half_pixel_centers.
+ // default value on current is false.
+ // it need to be updated when CircleResizeNearestNeighbor updated.
+ params.half_pixel_centers = false;
+
+ return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp
new file mode 100644
index 000000000..c1a7f5350
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ReverseV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReverseV2(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleReverseV2 *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->tensor());
+ const Tensor *axes = helper.getInputTensor(node->axis());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::ReverseV2>(input, axes, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp
new file mode 100644
index 000000000..0714a5dba
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Rsqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRsqrt(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleRsqrt *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Rsqrt>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp
new file mode 100644
index 000000000..d172ef438
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SVDF.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleSVDF *>(circle_node);
+ assert(node->arity() == 5);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *feature = helper.getInputTensor(node->weight_feature());
+ const Tensor *time = helper.getInputTensor(node->weight_time());
+ const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+ const Tensor *input_activation_state = helper.getInputTensor(node->input_activation_state());
+ Tensor *output = helper.getOutputTensor(node);
+
+ auto scratchpad_tensor = std::make_unique<Tensor>(input_activation_state->element_type(),
+ Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32;
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ if (data_type == DataType::FLOAT32 &&
+ (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8))
+ {
+ data_type = feature->element_type();
+ }
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ data_type = DataType::FLOAT32;
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_4 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_5 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_6 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ SVDFParams params{};
+ params.activation = node->fusedActivationFunction();
+ params.svdf_rank = node->svdf_rank();
+ params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs();
+
+ return std::make_unique<kernels::SVDF>(input, feature, time, bias, input_activation_state, output,
+ tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp
new file mode 100644
index 000000000..d1edbc794
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Shape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleShape(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleShape *>(circle_node);
+ assert(node->arity() == 1);
+
+ const auto input = helper.getInputTensor(node->input());
+ auto output = helper.getOutputTensor(node);
+
+ ShapeParams shape_params{};
+ shape_params.out_type = node->out_type();
+
+ return std::make_unique<kernels::ShapeKernel>(input, output, shape_params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp
new file mode 100644
index 000000000..60ac6417c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Slice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSlice(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleSlice *>(circle_node);
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *begin = helper.getInputTensor(node->begin());
+ const Tensor *size = helper.getInputTensor(node->size());
+
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Slice>(input, begin, size, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp
new file mode 100644
index 000000000..f41f63f6f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Softmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSoftmax(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleSoftmax *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->logits());
+ Tensor *output = helper.getOutputTensor(node);
+
+ SoftmaxParams params{};
+ params.beta = node->beta();
+
+ return std::make_unique<kernels::Softmax>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
new file mode 100644
index 000000000..b6e6cf516
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToBatchND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleSpaceToBatchND *>(circle_node);
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *block_shape = helper.getInputTensor(node->block_shape());
+ const Tensor *paddings = helper.getInputTensor(node->paddings());
+
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
new file mode 100644
index 000000000..63fdb95ec
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToDepth.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleSpaceToDepth *>(circle_node);
+ assert(node->arity() == 1);
+ const Tensor *input = helper.getInputTensor(node->input());
+
+ Tensor *output = helper.getOutputTensor(node);
+
+ SpaceToDepthParams params{};
+ params.block_size = node->block_size();
+
+ return std::make_unique<kernels::SpaceToDepth>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp
new file mode 100644
index 000000000..3f6d4a7df
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Split.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplit(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleSplit *>(circle_node);
+ auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node);
+ assert(node->arity() == 2);
+ assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
+
+ const Tensor *axis = helper.getInputTensor(node->split_dim());
+ const Tensor *input = helper.getInputTensor(node->input());
+ std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+ // NOTE 'num_splits' attribute is ignored.
+ return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp
new file mode 100644
index 000000000..0788822ca
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SplitV.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplitV(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleSplitV *>(circle_node);
+ auto output_nodes = collectOutputNodes<luci::CircleSplitVOut>(node);
+ assert(node->arity() == 3);
+ assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *sizes_data = helper.getInputTensor(node->size_splits());
+ const Tensor *axis = helper.getInputTensor(node->split_dim());
+ std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+ // NOTE 'num_splits' attribute is ignored.
+ return std::make_unique<kernels::SplitV>(input, sizes_data, axis, std::move(outputs));
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp
new file mode 100644
index 000000000..b9843fe0b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqrt(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleSqrt *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Sqrt>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp
new file mode 100644
index 000000000..0ad7c1772
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Square.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquare(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleSquare *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Square>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
new file mode 100644
index 000000000..e4c6fd851
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SquaredDifference.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleSquaredDifference *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::SquaredDifference>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp
new file mode 100644
index 000000000..6885f8077
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Squeeze.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqueeze(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleSqueeze *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ Tensor *output = helper.getOutputTensor(node);
+
+ SqueezeParams params{};
+ params.squeeze_dims = node->squeeze_dims();
+
+ return std::make_unique<kernels::Squeeze>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp
new file mode 100644
index 000000000..359b4e3e9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/StridedSlice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleStridedSlice *>(circle_node);
+ assert(node->arity() == 4);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *begin = helper.getInputTensor(node->begin());
+ const Tensor *end = helper.getInputTensor(node->end());
+ const Tensor *strides = helper.getInputTensor(node->strides());
+
+ Tensor *output = helper.getOutputTensor(node);
+
+ StridedSliceParams params{};
+ params.begin_mask = node->begin_mask();
+ params.ellipsis_mask = node->ellipsis_mask();
+ params.end_mask = node->end_mask();
+ params.new_axis_mask = node->new_axis_mask();
+ params.shrink_axis_mask = node->shrink_axis_mask();
+
+ return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp
new file mode 100644
index 000000000..a6252cb53
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sub.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSub(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleSub *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ SubParams params{};
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::Sub>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp
new file mode 100644
index 000000000..a58ef60a8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Tanh.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTanh(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleTanh *>(circle_node);
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Tanh>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp
new file mode 100644
index 000000000..ea17d8311
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Transpose.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTranspose(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleTranspose *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->a());
+ const Tensor *perm = helper.getInputTensor(node->perm());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Transpose>(input, perm, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp
new file mode 100644
index 000000000..d773e301e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/TransposeConv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleTransposeConv *>(circle_node);
+ assert(node->arity() == 4);
+
+ const Tensor *input_sizes = helper.getInputTensor(node->inputSizes());
+ const Tensor *filter = helper.getInputTensor(node->filter());
+ const Tensor *out_backprop = helper.getInputTensor(node->outBackprop());
+ const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+
+ Tensor *output = helper.getOutputTensor(node);
+
+ DataType scratch_data_type =
+ helper.getInputTensor(node)->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+
+ auto scratch_tensor =
+ std::make_unique<Tensor>(scratch_data_type, Shape({}), AffineQuantization{}, "");
+ scratch_tensor->set_observable(false);
+ scratch_tensor->set_data_buffer(nullptr);
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratch_tensor));
+
+ TransposeConvParams params{};
+ params.padding = node->padding();
+ params.stride_height = node->stride()->h();
+ params.stride_width = node->stride()->w();
+
+ return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
+ tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp
new file mode 100644
index 000000000..a1c0d323a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Unpack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleUnpack(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleUnpack *>(circle_node);
+ auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node);
+ assert(node->arity() == 1);
+ assert(output_nodes.size() == static_cast<size_t>(node->num()));
+
+ const Tensor *input = helper.getInputTensor(node->value());
+ std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+ UnpackParams params{};
+ params.axis = node->axis();
+
+ // NOTE 'num' attribute is ignored.
+ return std::make_unique<kernels::Unpack>(input, std::move(outputs), params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp
new file mode 100644
index 000000000..8fde6ec8a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/While.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleWhile(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleWhile *>(circle_node);
+
+ auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node);
+ assert(node->arity() == node->input_count());
+ assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
+
+ std::vector<const Tensor *> inputs(node->input_count());
+ for (uint32_t i = 0; i < node->input_count(); ++i)
+ {
+ inputs[i] = helper.getInputTensor(node->input(i));
+ }
+ std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+ RuntimeGraph *cond_graph = helper.getRuntimeGraph(node->cond_graph());
+ RuntimeGraph *body_graph = helper.getRuntimeGraph(node->body_graph());
+
+ return std::make_unique<kernels::While>(std::move(inputs), std::move(outputs), cond_graph,
+ body_graph);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/standalone/CMakeLists.txt b/compiler/luci-micro/standalone/CMakeLists.txt
index 7953359ad..d3048264d 100644
--- a/compiler/luci-micro/standalone/CMakeLists.txt
+++ b/compiler/luci-micro/standalone/CMakeLists.txt
@@ -7,6 +7,9 @@ set(BUILD_WHITELIST "dummy")
add_subdirectory(${NNAS_ROOT}/infra/nncc ${CMAKE_CURRENT_BINARY_DIR}/nncc)
set(ONE_COMPILER_SRC_DIR "${NNAS_PROJECT_SOURCE_DIR}/compiler")
+nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET)
+
+include_directories(${FlatBuffersSource_DIR}/include)
add_subdirectory(${ONE_COMPILER_SRC_DIR}/loco ${CMAKE_CURRENT_BINARY_DIR}/loco)
add_subdirectory(${ONE_COMPILER_SRC_DIR}/angkor ${CMAKE_CURRENT_BINARY_DIR}/angkor)
@@ -14,7 +17,21 @@ add_subdirectory(${ONE_COMPILER_SRC_DIR}/oops ${CMAKE_CURRENT_BINARY_DIR}/oops)
add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-str ${CMAKE_CURRENT_BINARY_DIR}/pepper-str)
add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo ${CMAKE_CURRENT_BINARY_DIR}/logo)
add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo-core ${CMAKE_CURRENT_BINARY_DIR}/logo-core)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/hermes-std ${CMAKE_CURRENT_BINARY_DIR}/hermes-std)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/hermes ${CMAKE_CURRENT_BINARY_DIR}/hermes)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-strcast ${CMAKE_CURRENT_BINARY_DIR}/pepper-strcast)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/foder ${CMAKE_CURRENT_BINARY_DIR}/foder)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/mio-circle04 ${CMAKE_CURRENT_BINARY_DIR}/mio-circle04)
+
add_subdirectory(${ONE_COMPILER_SRC_DIR}/locomotiv ${CMAKE_CURRENT_BINARY_DIR}/locomotiv)
add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/lang ${CMAKE_CURRENT_BINARY_DIR}/luci/lang)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/import ${CMAKE_CURRENT_BINARY_DIR}/luci/import)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/profile ${CMAKE_CURRENT_BINARY_DIR}/luci/profile)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/env ${CMAKE_CURRENT_BINARY_DIR}/luci/env)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/plan ${CMAKE_CURRENT_BINARY_DIR}/luci/plan)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/log ${CMAKE_CURRENT_BINARY_DIR}/luci/log)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/logex ${CMAKE_CURRENT_BINARY_DIR}/luci/logex)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/locop ${CMAKE_CURRENT_BINARY_DIR}/locop)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/pp ${CMAKE_CURRENT_BINARY_DIR}/pp)
-add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci-micro/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter)
diff --git a/compiler/luci-pass-value-test/CMakeLists.txt b/compiler/luci-pass-value-test/CMakeLists.txt
index 034fe5269..3489f1eac 100644
--- a/compiler/luci-pass-value-test/CMakeLists.txt
+++ b/compiler/luci-pass-value-test/CMakeLists.txt
@@ -17,6 +17,13 @@ macro(addeval RECIPE PASS_OPTION)
set(PASS_CIRCLE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${PASS_CIRCLE_FILE}")
set(DASH_PASS_OPTION "--${PASS_OPTION}")
+ foreach(MORE_OPTIONS ${ARGN})
+ list(APPEND DASH_PASS_OPTION "--${MORE_OPTIONS}")
+ endforeach()
+ # NOTE if there are two options, 'DASH_PASS_OPTION' will be like '--option_a;--option_b'
+ # add_custom_command() will translate ';' to two arguments as '--optiona_a --optionb'
+ # do not use set(DASH_PASS_OPTION "${DASH_PASS_OPTION} --${ARG}"))
+ # as this will become like '"--optiona_a --optionb"' which is one string argument
# Generate optimized .circle
add_custom_command(OUTPUT ${PASS_CIRCLE_OUTPUT_PATH}
diff --git a/compiler/luci-pass-value-test/test.lst b/compiler/luci-pass-value-test/test.lst
index 67476c644..cdff159e0 100644
--- a/compiler/luci-pass-value-test/test.lst
+++ b/compiler/luci-pass-value-test/test.lst
@@ -14,6 +14,8 @@ addeval(Net_Conv_Add_Mul_002 fuse_batchnorm_with_conv)
addeval(Net_Conv_Min_Max_000 transform_min_max_to_relu6)
addeval(Net_Conv_Min_Relu_000 transform_min_relu_to_relu6)
addeval(Net_Conv_Relu6_000 fuse_activation_function)
+addeval(Net_Densify_Add_000 fold_densify)
+addeval(Net_Dequantize_Add_000 fold_dequantize)
addeval(Net_DwConv_BN_000 fuse_batchnorm_with_dwconv)
addeval(Net_DwConv_BN_001 fuse_batchnorm_with_dwconv)
addeval(Net_Reshape_Neg_000 forward_reshape_to_unaryop)
@@ -25,10 +27,17 @@ addeval(Net_TConv_Add_002 fuse_add_with_tconv)
addeval(Net_TConv_BN_000 fuse_batchnorm_with_tconv)
addeval(Net_TConv_BN_001 fuse_batchnorm_with_tconv)
addeval(Net_TConv_BN_002 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_003 fuse_batchnorm_with_tconv)
+addeval(Net_TConv_BN_004 fuse_batchnorm_with_tconv)
addeval(Net_InstanceNorm_001 fuse_instnorm)
addeval(Net_InstanceNorm_002 fuse_instnorm)
addeval(Net_InstanceNorm_003 fuse_instnorm)
addeval(Net_StridedSlice_StridedSlice_000 remove_unnecessary_strided_slice)
+addeval(FullyConnected_007 replace_non_const_fc_with_batch_matmul)
+
+# test for limited support for FLOAT16
+addeval(Net_Dequantize_Add_000 fold_dequantize)
+addeval(Net_Densify_Dequantize_Add_000 fold_dequantize fold_densify)
# test SignatureDef, with any optimization
#addeval(SignatureDef_MultiOut_000 fuse_instnorm)
diff --git a/compiler/luci-value-test/test.lst b/compiler/luci-value-test/test.lst
index f62b72919..932da95c5 100644
--- a/compiler/luci-value-test/test.lst
+++ b/compiler/luci-value-test/test.lst
@@ -161,6 +161,8 @@ addeval(Squeeze_001)
addeval(StridedSlice_000)
addeval(StridedSlice_001)
addeval(StridedSlice_002)
+addeval(StridedSlice_003)
+addeval(StridedSlice_004)
addeval(Sub_000)
addeval(Sub_U8_000)
#addeval(Sum_000)
diff --git a/compiler/luci/export/src/CircleBuiltinTypesExtractor.h b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h
index 0ff21a34b..7516197c0 100644
--- a/compiler/luci/export/src/CircleBuiltinTypesExtractor.h
+++ b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h
@@ -118,6 +118,10 @@ public:
return circle::CreateCosOptions(_builder).Union();
}
flatbuffers::Offset<void> visit(luci::CircleCustom *) { return _no_option; }
+ flatbuffers::Offset<void> visit(luci::CircleDensify *)
+ {
+ return circle::CreateDensifyOptions(_builder).Union();
+ }
flatbuffers::Offset<void> visit(luci::CircleDepthToSpace *node)
{
return circle::CreateDepthToSpaceOptions(_builder, node->block_size()).Union();
diff --git a/compiler/luci/export/src/CircleOps.lst b/compiler/luci/export/src/CircleOps.lst
index 1b6909303..8a75ef706 100644
--- a/compiler/luci/export/src/CircleOps.lst
+++ b/compiler/luci/export/src/CircleOps.lst
@@ -32,6 +32,7 @@ CIRCLE_NODE(CircleConcatenation, BuiltinOperator_CONCATENATION, BuiltinOptions_C
CIRCLE_NODE(CircleConv2D, BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions)
CIRCLE_NODE(CircleCos, BuiltinOperator_COS, BuiltinOptions_CosOptions)
CIRCLE_NODE(CircleCustom, BuiltinOperator_CUSTOM, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleDensify, BuiltinOperator_DENSIFY, BuiltinOptions_DensifyOptions)
CIRCLE_NODE(CircleDepthToSpace, BuiltinOperator_DEPTH_TO_SPACE, BuiltinOptions_DepthToSpaceOptions)
CIRCLE_NODE(CircleDepthwiseConv2D, BuiltinOperator_DEPTHWISE_CONV_2D, BuiltinOptions_DepthwiseConv2DOptions)
CIRCLE_NODE(CircleDequantize, BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions)
diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp
index b3bb850cc..97e81076b 100644
--- a/compiler/luci/export/src/CircleTensorExporter.cpp
+++ b/compiler/luci/export/src/CircleTensorExporter.cpp
@@ -434,6 +434,12 @@ flatbuffers::Offset<circle::Buffer> encodeOpBuffer(FlatBufferBuilder &builder, l
break;
}
+ // NOTE loco::DataType::FLOAT16 is added but we do not export this type
+ // as backends currently don't support this type.
+ // currently this is supported only for "Tensor(Float16) - Dequantize"
+ // sequence so that after 'fold_dequantize' option this Tensor is
+ // converted to FLOAT32.
+
INTERNAL_EXN_V("Unsupported datatype", oops::to_uint32(c->dtype()));
}
diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt
index 1b2db23ae..bc0a00b34 100644
--- a/compiler/luci/import/CMakeLists.txt
+++ b/compiler/luci/import/CMakeLists.txt
@@ -18,6 +18,7 @@ target_link_libraries(luci_import PRIVATE luci_log)
target_link_libraries(luci_import PRIVATE luci_logex)
target_link_libraries(luci_import PRIVATE nncc_common)
target_link_libraries(luci_import PRIVATE locop)
+target_link_libraries(luci_import PRIVATE foder)
target_link_libraries(luci_import PRIVATE oops)
target_link_libraries(luci_import PRIVATE mio_circle04_helper)
install(TARGETS luci_import DESTINATION lib)
diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h
index 7a5045ede..a4a6d7ce8 100644
--- a/compiler/luci/import/include/luci/Import/Nodes.h
+++ b/compiler/luci/import/include/luci/Import/Nodes.h
@@ -35,6 +35,7 @@
#include "Nodes/CircleConv2D.h"
#include "Nodes/CircleCos.h"
#include "Nodes/CircleCustom.h"
+#include "Nodes/CircleDensify.h"
#include "Nodes/CircleDepthToSpace.h"
#include "Nodes/CircleDepthwiseConv2D.h"
#include "Nodes/CircleDequantize.h"
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h b/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h
new file mode 100644
index 000000000..42bdac1a4
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleDensify.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__
+#define __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleDensifyGraphBuilder : public GraphBuilder
+{
+public:
+ bool validate(const ValidateArgs &args) const final;
+
+private:
+ CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_DENSIFY_H__
diff --git a/compiler/luci/import/include/luci/ImporterEx.h b/compiler/luci/import/include/luci/ImporterEx.h
new file mode 100644
index 000000000..852d4c848
--- /dev/null
+++ b/compiler/luci/import/include/luci/ImporterEx.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORTER_EX_H__
+#define __LUCI_IMPORTER_EX_H__
+
+#include "luci/IR/Module.h"
+
+#include <memory>
+#include <string>
+
+namespace luci
+{
+
+class ImporterEx final
+{
+public:
+ ImporterEx() = default;
+
+public:
+ std::unique_ptr<Module> importVerifyModule(const std::string &input_path) const;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORTER_EX_H__
diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp
index fe2d830e9..d3b52aadb 100644
--- a/compiler/luci/import/src/GraphBuilderRegistry.cpp
+++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp
@@ -44,6 +44,7 @@ GraphBuilderRegistry::GraphBuilderRegistry()
CIRCLE_NODE(CONCATENATION, CircleConcatenationGraphBuilder); // 2
CIRCLE_NODE(CONV_2D, CircleConv2DGraphBuilder); // 3
CIRCLE_NODE(COS, CircleCosGraphBuilder); // 108
+ CIRCLE_NODE(DENSIFY, CircleDensifyGraphBuilder); // 124
CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpaceGraphBuilder); // 5
CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2DGraphBuilder); // 4
CIRCLE_NODE(DEQUANTIZE, CircleDequantizeGraphBuilder); // 6
@@ -160,7 +161,6 @@ GraphBuilderRegistry::GraphBuilderRegistry()
// BuiltinOperator_DELEGATE = 51,
// BuiltinOperator_ARG_MAX = 56,
// BuiltinOperator_HARD_SWISH = 117,
- // BuiltinOperator_DENSIFY = 124,
// Register builders for nodes which not handles in builders registered above.
#define CIRCLE_NODE(CLASS) add(std::make_unique<CLASS>())
diff --git a/compiler/luci/import/src/ImporterEx.cpp b/compiler/luci/import/src/ImporterEx.cpp
new file mode 100644
index 000000000..db585fd4d
--- /dev/null
+++ b/compiler/luci/import/src/ImporterEx.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Importer.h"
+#include "luci/ImporterEx.h"
+
+#include <foder/FileLoader.h>
+
+#include <memory>
+#include <iostream>
+
+namespace luci
+{
+
+std::unique_ptr<Module> ImporterEx::importVerifyModule(const std::string &input_path) const
+{
+ foder::FileLoader file_loader{input_path};
+ std::vector<char> model_data;
+
+ try
+ {
+ model_data = file_loader.load();
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cerr << err.what() << std::endl;
+ return nullptr;
+ }
+
+ flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+ if (!circle::VerifyModelBuffer(verifier))
+ {
+ std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+ return nullptr;
+ }
+
+ const circle::Model *circle_model = circle::GetModel(model_data.data());
+ if (circle_model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+ return nullptr;
+ }
+
+ Importer importer;
+ return importer.importModule(circle_model);
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp
index a4f190dd9..88f2ae3d0 100644
--- a/compiler/luci/import/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/import/src/Nodes/CircleConst.cpp
@@ -166,6 +166,10 @@ CircleNode *CircleConstNodeBuilder::build(TensorIndex tensor_index,
copy_data<loco::DataType::FLOAT32>(buffer, num_elements, const_node);
break;
+ case loco::DataType::FLOAT16:
+ copy_data<loco::DataType::FLOAT16>(buffer, num_elements, const_node);
+ break;
+
case loco::DataType::U8:
copy_data<loco::DataType::U8>(buffer, num_elements, const_node);
break;
diff --git a/compiler/luci/import/src/Nodes/CircleDensify.cpp b/compiler/luci/import/src/Nodes/CircleDensify.cpp
new file mode 100644
index 000000000..0a4b2186f
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleDensify.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleDensify.h"
+
+#include <luci/IR/Nodes/CircleDensify.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleDensifyGraphBuilder::validate(const ValidateArgs &args) const
+{
+ return GraphBuilder::validate(args, 1);
+}
+
+CircleNode *CircleDensifyGraphBuilder::build_node(const circle::OperatorT &,
+ const std::vector<CircleNode *> &inputs,
+ loco::Graph *graph) const
+{
+ auto *node = graph->nodes()->create<CircleDensify>();
+ node->input(inputs.at(0));
+
+ // No options for Densify
+
+ return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h
index d89ea03cc..901f1cbca 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h
@@ -32,6 +32,7 @@
#include "Nodes/CircleConv2D.h"
#include "Nodes/CircleCos.h"
#include "Nodes/CircleCustom.h"
+#include "Nodes/CircleDensify.h"
#include "Nodes/CircleDepthToSpace.h"
#include "Nodes/CircleDepthwiseConv2D.h"
#include "Nodes/CircleDequantize.h"
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
index 1472008df..f227a03f5 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
@@ -30,6 +30,7 @@ CIRCLE_NODE(CONCATENATION, CircleConcatenation)
CIRCLE_NODE(CONV_2D, CircleConv2D)
CIRCLE_NODE(COS, CircleCos)
CIRCLE_NODE(CUSTOM, CircleCustom)
+CIRCLE_NODE(DENSIFY, CircleDensify)
CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpace)
CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2D)
CIRCLE_NODE(DEQUANTIZE, CircleDequantize)
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h
new file mode 100644
index 000000000..7acad0341
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleDensify.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_DENSIFY_H__
+#define __LUCI_IR_CIRCLE_DENSIFY_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief DENSIFY in Circle
+ */
+class CircleDensify final : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::DENSIFY>>
+{
+public:
+ loco::Node *input(void) const { return at(0)->node(); }
+ void input(loco::Node *node) { at(0)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_DENSIFY_H__
diff --git a/compiler/luci/lang/src/Nodes/CircleConst.cpp b/compiler/luci/lang/src/Nodes/CircleConst.cpp
index c2d82c8a2..a4854ec59 100644
--- a/compiler/luci/lang/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleConst.cpp
@@ -77,6 +77,7 @@ INSTANTIATE(loco::DataType::S8);
INSTANTIATE(loco::DataType::FLOAT32);
INSTANTIATE(loco::DataType::U8);
INSTANTIATE(loco::DataType::BOOL);
+INSTANTIATE(loco::DataType::FLOAT16);
#undef INSTANTIATE
diff --git a/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp b/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp
new file mode 100644
index 000000000..ae83784a5
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleDensify.test.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleDensify.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleDensifyTest, constructor)
+{
+ luci::CircleDensify densify_node;
+
+ ASSERT_EQ(luci::CircleDialect::get(), densify_node.dialect());
+ ASSERT_EQ(luci::CircleOpcode::DENSIFY, densify_node.opcode());
+
+ ASSERT_EQ(nullptr, densify_node.input());
+}
+
+TEST(CircleDensifyTest, input_NEG)
+{
+ luci::CircleDensify densify_node;
+ luci::CircleDensify node;
+
+ densify_node.input(&node);
+ ASSERT_NE(nullptr, densify_node.input());
+
+ densify_node.input(nullptr);
+ ASSERT_EQ(nullptr, densify_node.input());
+}
+
+TEST(CircleDensifyTest, arity_NEG)
+{
+ luci::CircleDensify densify_node;
+
+ ASSERT_NO_THROW(densify_node.arg(0));
+ ASSERT_THROW(densify_node.arg(1), std::out_of_range);
+}
+
+TEST(CircleDensifyTest, visit_mutable_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+ {
+ };
+
+ luci::CircleDensify densify_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(densify_node.accept(&tv), std::exception);
+}
+
+TEST(CircleDensifyTest, visit_NEG)
+{
+ struct TestVisitor final : public luci::CircleNodeVisitor<void>
+ {
+ };
+
+ luci::CircleDensify densify_node;
+
+ TestVisitor tv;
+ ASSERT_THROW(densify_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp
index eff0830b4..8409f250e 100644
--- a/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp
@@ -137,6 +137,7 @@ CircleNodeSummaryBuilder::create_builder(const luci::CircleNode *node)
CIRCLE_NODE(CONV_2D, CircleConv2DSummaryBuilder)
CIRCLE_NODE(COS, CircleCosSummaryBuilder)
CIRCLE_NODE(CUSTOM, CircleCustomSummaryBuilder)
+ CIRCLE_NODE(DENSIFY, CircleDensifySummaryBuilder)
CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpaceSummaryBuilder)
CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2DSummaryBuilder)
CIRCLE_NODE(DEQUANTIZE, CircleDequantizeSummaryBuilder)
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp
index 6df9270e3..48e4579ea 100644
--- a/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp
@@ -374,6 +374,22 @@ void CircleConcatenationSummaryBuilder::build_attributes(const luci::CircleNode
s.args().append("fused_activation_function", to_str(concat->fusedActivationFunction()));
}
+void CircleConstSummaryBuilder::build_attributes(const luci::CircleNode *node,
+ locop::NodeSummary &s)
+{
+ auto circonst = loco::must_cast<const luci::CircleConst *>(node);
+ s.args().append("dtype", to_str(circonst->dtype()));
+ s.args().append("rank", std::to_string(circonst->rank()));
+ std::string shape;
+ for (uint32_t r = 0; r < circonst->rank(); ++r)
+ {
+ if (!shape.empty())
+ shape += " ";
+ shape += std::to_string(circonst->dim(r).value());
+ }
+ s.args().append("shape", "[" + shape + "]");
+}
+
void CircleConstSummaryBuilder::update_status(locop::NodeSummary &s)
{
s.state(locop::NodeDesc::State::PartiallyKnown);
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.h b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h
index 6cd24b7f1..f0cac4e5e 100644
--- a/compiler/luci/logex/src/CircleNodeSummaryBuilders.h
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h
@@ -167,6 +167,7 @@ private:
class CircleConstSummaryBuilder final : public CircleNodeSummaryBuilder
{
private:
+ void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
void update_status(locop::NodeSummary &s);
};
@@ -189,6 +190,10 @@ private:
void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
};
+class CircleDensifySummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
class CircleDepthToSpaceSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
{
private:
diff --git a/compiler/luci/partition/include/luci/ConnectNode.h b/compiler/luci/partition/include/luci/ConnectNode.h
new file mode 100644
index 000000000..2d9d41d77
--- /dev/null
+++ b/compiler/luci/partition/include/luci/ConnectNode.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PARTITION_CONNECT_NODE_H__
+#define __LUCI_PARTITION_CONNECT_NODE_H__
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @note MapNode2Clone is used as a map from original node to cloned node
+ * to find input of a cloned node
+ *
+ * (Original) (Clone)
+ *
+ * [A] [A']
+ * | [B] | [B']
+ * | | | |
+ * \ / \ /
+ * [C] [C']
+ *
+ * From view of [C'] we need to find [A'] and [B']. We know [C] from [C'],
+ * then we can get from input of [C] as [A], [B] then [A]->[A'] and [B]->[B']
+ * from the map.
+ */
+using MapNode2Clone = std::map<const CircleNode * /* ORG */, CircleNode * /* CLONE */>;
+
+struct CloneContext
+{
+ std::pair<MapNode2Clone::iterator, bool> emplace(const CircleNode *org, CircleNode *clone)
+ {
+ return node2clone.emplace(org, clone);
+ }
+ MapNode2Clone::iterator find(const CircleNode *org) { return node2clone.find(org); }
+ MapNode2Clone::iterator end(void) { return node2clone.end(); }
+
+ MapNode2Clone::const_iterator find(const CircleNode *org) const { return node2clone.find(org); }
+ MapNode2Clone::const_iterator end(void) const { return node2clone.end(); }
+
+ MapNode2Clone node2clone;
+};
+
+class ConnectNode final : public luci::CircleNodeVisitor<void>
+{
+public:
+ ConnectNode(luci::CloneContext &clonecontext) : _clonecontext(clonecontext){};
+
+public:
+ void visit(const luci::CircleAbs *) final;
+ void visit(const luci::CircleAdd *) final;
+ void visit(const luci::CircleAddN *) final;
+ void visit(const luci::CircleArgMax *) final;
+ void visit(const luci::CircleArgMin *) final;
+ void visit(const luci::CircleAveragePool2D *) final;
+ void visit(const luci::CircleBatchMatMul *) final;
+ void visit(const luci::CircleBatchToSpaceND *) final;
+ void visit(const luci::CircleCast *) final;
+ void visit(const luci::CircleCeil *) final;
+ void visit(const luci::CircleConcatenation *) final;
+ void visit(const luci::CircleConst *) final;
+ void visit(const luci::CircleConv2D *) final;
+ void visit(const luci::CircleCos *) final;
+ void visit(const luci::CircleCustom *) final;
+ void visit(const luci::CircleDensify *) final;
+ void visit(const luci::CircleDepthToSpace *) final;
+ void visit(const luci::CircleDepthwiseConv2D *) final;
+ void visit(const luci::CircleDequantize *) final;
+ void visit(const luci::CircleDiv *) final;
+ void visit(const luci::CircleElu *) final;
+ void visit(const luci::CircleEqual *) final;
+ void visit(const luci::CircleExp *) final;
+ void visit(const luci::CircleExpandDims *) final;
+ void visit(const luci::CircleFakeQuant *) final;
+ void visit(const luci::CircleFill *) final;
+ void visit(const luci::CircleFloor *) final;
+ void visit(const luci::CircleFloorDiv *) final;
+ void visit(const luci::CircleFloorMod *) final;
+ void visit(const luci::CircleFullyConnected *) final;
+ void visit(const luci::CircleGather *) final;
+ void visit(const luci::CircleGatherNd *) final;
+ void visit(const luci::CircleGreater *) final;
+ void visit(const luci::CircleGreaterEqual *) final;
+ void visit(const luci::CircleIf *) final;
+ void visit(const luci::CircleL2Normalize *) final;
+ void visit(const luci::CircleL2Pool2D *) final;
+ void visit(const luci::CircleLeakyRelu *) final;
+ void visit(const luci::CircleLess *) final;
+ void visit(const luci::CircleLessEqual *) final;
+ void visit(const luci::CircleLocalResponseNormalization *) final;
+ void visit(const luci::CircleLog *) final;
+ void visit(const luci::CircleLogicalAnd *) final;
+ void visit(const luci::CircleLogicalNot *) final;
+ void visit(const luci::CircleLogicalOr *) final;
+ void visit(const luci::CircleLogistic *) final;
+ void visit(const luci::CircleLogSoftmax *) final;
+ void visit(const luci::CircleMatrixDiag *) final;
+ void visit(const luci::CircleMatrixSetDiag *) final;
+ void visit(const luci::CircleMaximum *) final;
+ void visit(const luci::CircleMaxPool2D *) final;
+ void visit(const luci::CircleMean *) final;
+ void visit(const luci::CircleMinimum *) final;
+ void visit(const luci::CircleMirrorPad *) final;
+ void visit(const luci::CircleMul *) final;
+ void visit(const luci::CircleNeg *) final;
+ void visit(const luci::CircleNonMaxSuppressionV4 *) final;
+ void visit(const luci::CircleNonMaxSuppressionV5 *) final;
+ void visit(const luci::CircleNotEqual *) final;
+ void visit(const luci::CircleOneHot *) final;
+ void visit(const luci::CirclePack *) final;
+ void visit(const luci::CirclePad *) final;
+ void visit(const luci::CirclePadV2 *) final;
+ void visit(const luci::CirclePow *) final;
+ void visit(const luci::CirclePRelu *) final;
+ void visit(const luci::CircleQuantize *) final;
+ void visit(const luci::CircleRange *) final;
+ void visit(const luci::CircleRank *) final;
+ void visit(const luci::CircleReduceAny *) final;
+ void visit(const luci::CircleReduceMax *) final;
+ void visit(const luci::CircleReduceMin *) final;
+ void visit(const luci::CircleReduceProd *) final;
+ void visit(const luci::CircleRelu *) final;
+ void visit(const luci::CircleRelu6 *) final;
+ void visit(const luci::CircleReluN1To1 *) final;
+ void visit(const luci::CircleReshape *) final;
+ void visit(const luci::CircleResizeBilinear *) final;
+ void visit(const luci::CircleResizeNearestNeighbor *) final;
+ void visit(const luci::CircleReverseSequence *) final;
+ void visit(const luci::CircleReverseV2 *) final;
+ void visit(const luci::CircleRound *) final;
+ void visit(const luci::CircleRsqrt *) final;
+ void visit(const luci::CircleScatterNd *) final;
+ void visit(const luci::CircleSegmentSum *) final;
+ void visit(const luci::CircleSelect *) final;
+ void visit(const luci::CircleSelectV2 *) final;
+ void visit(const luci::CircleShape *) final;
+ void visit(const luci::CircleSin *) final;
+ void visit(const luci::CircleSlice *) final;
+ void visit(const luci::CircleSoftmax *) final;
+ void visit(const luci::CircleSpaceToBatchND *) final;
+ void visit(const luci::CircleSpaceToDepth *) final;
+ void visit(const luci::CircleSparseToDense *) final;
+ void visit(const luci::CircleSplit *) final;
+ void visit(const luci::CircleSplitV *) final;
+ void visit(const luci::CircleSqrt *) final;
+ void visit(const luci::CircleSquare *) final;
+ void visit(const luci::CircleSquaredDifference *) final;
+ void visit(const luci::CircleSqueeze *) final;
+ void visit(const luci::CircleStridedSlice *) final;
+ void visit(const luci::CircleSVDF *) final;
+ void visit(const luci::CircleSub *) final;
+ void visit(const luci::CircleSum *) final;
+ void visit(const luci::CircleTanh *) final;
+ void visit(const luci::CircleTile *) final;
+ void visit(const luci::CircleTopKV2 *) final;
+ void visit(const luci::CircleTranspose *) final;
+ void visit(const luci::CircleTransposeConv *) final;
+ void visit(const luci::CircleUnidirectionalSequenceLSTM *) final;
+ void visit(const luci::CircleUnique *) final;
+ void visit(const luci::CircleUnpack *) final;
+ void visit(const luci::CircleWhere *) final;
+ void visit(const luci::CircleWhile *) final;
+ void visit(const luci::CircleZerosLike *) final;
+
+ // Circle Only
+ void visit(const luci::CircleBCQFullyConnected *) final;
+ void visit(const luci::CircleBCQGather *) final;
+ void visit(const luci::CircleInstanceNorm *) final;
+
+ // NOTE CircleInput and CircleOutput are not handled here as these need
+ // link with graph I/O
+
+ // Virtual
+ void visit(const luci::CircleCustomOut *) final;
+ void visit(const luci::CircleIfOut *) final;
+ // void visit(const luci::CircleInput *) final;
+ void visit(const luci::CircleNonMaxSuppressionV4Out *) final;
+ void visit(const luci::CircleNonMaxSuppressionV5Out *) final;
+ // void visit(const luci::CircleOutput *) final;
+ void visit(const luci::CircleOutputDummy *) final;
+ void visit(const luci::CircleOutputExclude *) final;
+ void visit(const luci::CircleSplitOut *) final;
+ void visit(const luci::CircleSplitVOut *) final;
+ void visit(const luci::CircleTopKV2Out *) final;
+ void visit(const luci::CircleUniqueOut *) final;
+ void visit(const luci::CircleUnpackOut *) final;
+ void visit(const luci::CircleVariable *) final;
+ void visit(const luci::CircleWhileOut *) final;
+
+public:
+ luci::CircleNode *find_clone(const luci::CircleNode *node);
+
+protected:
+ luci::CloneContext &_clonecontext;
+};
+
+/**
+ * @brief Connect cloned node from input node
+ */
+void clone_connect(const luci::CircleNode *node, luci::CloneContext &clonecontext);
+
+} // namespace luci
+
+#endif // __LUCI_PARTITION_CONNECT_NODE_H__
diff --git a/compiler/luci/partition/src/ConnectNode.cpp b/compiler/luci/partition/src/ConnectNode.cpp
index 336be7c57..3d8c211c0 100644
--- a/compiler/luci/partition/src/ConnectNode.cpp
+++ b/compiler/luci/partition/src/ConnectNode.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include <oops/UserExn.h>
diff --git a/compiler/luci/partition/src/ConnectNode.h b/compiler/luci/partition/src/ConnectNode.h
deleted file mode 100644
index e60567c69..000000000
--- a/compiler/luci/partition/src/ConnectNode.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_PARTITION_CONNECT_NODE_H__
-#define __LUCI_PARTITION_CONNECT_NODE_H__
-
-#include <luci/IR/CircleNode.h>
-#include <luci/IR/CircleNodeVisitor.h>
-
-namespace luci
-{
-
-/**
- * @note MapNode2Clone is used as a map from original node to cloned node
- * to find input of a cloned node
- *
- * (Original) (Clone)
- *
- * [A] [A']
- * | [B] | [B']
- * | | | |
- * \ / \ /
- * [C] [C']
- *
- * From view of [C'] we need to find [A'] and [B']. We know [C] from [C'],
- * then we can get from input of [C] as [A], [B] then [A]->[A'] and [B]->[B']
- * from the map.
- */
-using MapNode2Clone = std::map<const CircleNode * /* ORG */, CircleNode * /* CLONE */>;
-
-struct CloneContext
-{
- std::pair<MapNode2Clone::iterator, bool> emplace(const CircleNode *org, CircleNode *clone)
- {
- return node2clone.emplace(org, clone);
- }
- MapNode2Clone::iterator find(const CircleNode *org) { return node2clone.find(org); }
- MapNode2Clone::iterator end(void) { return node2clone.end(); }
-
- MapNode2Clone::const_iterator find(const CircleNode *org) const { return node2clone.find(org); }
- MapNode2Clone::const_iterator end(void) const { return node2clone.end(); }
-
- MapNode2Clone node2clone;
-};
-
-class ConnectNode final : public luci::CircleNodeVisitor<void>
-{
-public:
- ConnectNode(luci::CloneContext &clonecontext) : _clonecontext(clonecontext){};
-
-public:
- void visit(const luci::CircleAbs *) final;
- void visit(const luci::CircleAdd *) final;
- void visit(const luci::CircleAddN *) final;
- void visit(const luci::CircleArgMax *) final;
- void visit(const luci::CircleArgMin *) final;
- void visit(const luci::CircleAveragePool2D *) final;
- void visit(const luci::CircleBatchMatMul *) final;
- void visit(const luci::CircleBatchToSpaceND *) final;
- void visit(const luci::CircleCast *) final;
- void visit(const luci::CircleCeil *) final;
- void visit(const luci::CircleConcatenation *) final;
- void visit(const luci::CircleConst *) final;
- void visit(const luci::CircleConv2D *) final;
- void visit(const luci::CircleCos *) final;
- void visit(const luci::CircleCustom *) final;
- void visit(const luci::CircleDepthToSpace *) final;
- void visit(const luci::CircleDepthwiseConv2D *) final;
- void visit(const luci::CircleDequantize *) final;
- void visit(const luci::CircleDiv *) final;
- void visit(const luci::CircleElu *) final;
- void visit(const luci::CircleEqual *) final;
- void visit(const luci::CircleExp *) final;
- void visit(const luci::CircleExpandDims *) final;
- void visit(const luci::CircleFakeQuant *) final;
- void visit(const luci::CircleFill *) final;
- void visit(const luci::CircleFloor *) final;
- void visit(const luci::CircleFloorDiv *) final;
- void visit(const luci::CircleFloorMod *) final;
- void visit(const luci::CircleFullyConnected *) final;
- void visit(const luci::CircleGather *) final;
- void visit(const luci::CircleGatherNd *) final;
- void visit(const luci::CircleGreater *) final;
- void visit(const luci::CircleGreaterEqual *) final;
- void visit(const luci::CircleIf *) final;
- void visit(const luci::CircleL2Normalize *) final;
- void visit(const luci::CircleL2Pool2D *) final;
- void visit(const luci::CircleLeakyRelu *) final;
- void visit(const luci::CircleLess *) final;
- void visit(const luci::CircleLessEqual *) final;
- void visit(const luci::CircleLocalResponseNormalization *) final;
- void visit(const luci::CircleLog *) final;
- void visit(const luci::CircleLogicalAnd *) final;
- void visit(const luci::CircleLogicalNot *) final;
- void visit(const luci::CircleLogicalOr *) final;
- void visit(const luci::CircleLogistic *) final;
- void visit(const luci::CircleLogSoftmax *) final;
- void visit(const luci::CircleMatrixDiag *) final;
- void visit(const luci::CircleMatrixSetDiag *) final;
- void visit(const luci::CircleMaximum *) final;
- void visit(const luci::CircleMaxPool2D *) final;
- void visit(const luci::CircleMean *) final;
- void visit(const luci::CircleMinimum *) final;
- void visit(const luci::CircleMirrorPad *) final;
- void visit(const luci::CircleMul *) final;
- void visit(const luci::CircleNeg *) final;
- void visit(const luci::CircleNonMaxSuppressionV4 *) final;
- void visit(const luci::CircleNonMaxSuppressionV5 *) final;
- void visit(const luci::CircleNotEqual *) final;
- void visit(const luci::CircleOneHot *) final;
- void visit(const luci::CirclePack *) final;
- void visit(const luci::CirclePad *) final;
- void visit(const luci::CirclePadV2 *) final;
- void visit(const luci::CirclePow *) final;
- void visit(const luci::CirclePRelu *) final;
- void visit(const luci::CircleQuantize *) final;
- void visit(const luci::CircleRange *) final;
- void visit(const luci::CircleRank *) final;
- void visit(const luci::CircleReduceAny *) final;
- void visit(const luci::CircleReduceMax *) final;
- void visit(const luci::CircleReduceMin *) final;
- void visit(const luci::CircleReduceProd *) final;
- void visit(const luci::CircleRelu *) final;
- void visit(const luci::CircleRelu6 *) final;
- void visit(const luci::CircleReluN1To1 *) final;
- void visit(const luci::CircleReshape *) final;
- void visit(const luci::CircleResizeBilinear *) final;
- void visit(const luci::CircleResizeNearestNeighbor *) final;
- void visit(const luci::CircleReverseSequence *) final;
- void visit(const luci::CircleReverseV2 *) final;
- void visit(const luci::CircleRound *) final;
- void visit(const luci::CircleRsqrt *) final;
- void visit(const luci::CircleScatterNd *) final;
- void visit(const luci::CircleSegmentSum *) final;
- void visit(const luci::CircleSelect *) final;
- void visit(const luci::CircleSelectV2 *) final;
- void visit(const luci::CircleShape *) final;
- void visit(const luci::CircleSin *) final;
- void visit(const luci::CircleSlice *) final;
- void visit(const luci::CircleSoftmax *) final;
- void visit(const luci::CircleSpaceToBatchND *) final;
- void visit(const luci::CircleSpaceToDepth *) final;
- void visit(const luci::CircleSparseToDense *) final;
- void visit(const luci::CircleSplit *) final;
- void visit(const luci::CircleSplitV *) final;
- void visit(const luci::CircleSqrt *) final;
- void visit(const luci::CircleSquare *) final;
- void visit(const luci::CircleSquaredDifference *) final;
- void visit(const luci::CircleSqueeze *) final;
- void visit(const luci::CircleStridedSlice *) final;
- void visit(const luci::CircleSVDF *) final;
- void visit(const luci::CircleSub *) final;
- void visit(const luci::CircleSum *) final;
- void visit(const luci::CircleTanh *) final;
- void visit(const luci::CircleTile *) final;
- void visit(const luci::CircleTopKV2 *) final;
- void visit(const luci::CircleTranspose *) final;
- void visit(const luci::CircleTransposeConv *) final;
- void visit(const luci::CircleUnidirectionalSequenceLSTM *) final;
- void visit(const luci::CircleUnique *) final;
- void visit(const luci::CircleUnpack *) final;
- void visit(const luci::CircleWhere *) final;
- void visit(const luci::CircleWhile *) final;
- void visit(const luci::CircleZerosLike *) final;
-
- // Circle Only
- void visit(const luci::CircleBCQFullyConnected *) final;
- void visit(const luci::CircleBCQGather *) final;
- void visit(const luci::CircleInstanceNorm *) final;
-
- // NOTE CircleInput and CircleOutput are not handled here as these need
- // link with graph I/O
-
- // Virtual
- void visit(const luci::CircleCustomOut *) final;
- void visit(const luci::CircleIfOut *) final;
- // void visit(const luci::CircleInput *) final;
- void visit(const luci::CircleNonMaxSuppressionV4Out *) final;
- void visit(const luci::CircleNonMaxSuppressionV5Out *) final;
- // void visit(const luci::CircleOutput *) final;
- void visit(const luci::CircleOutputDummy *) final;
- void visit(const luci::CircleOutputExclude *) final;
- void visit(const luci::CircleSplitOut *) final;
- void visit(const luci::CircleSplitVOut *) final;
- void visit(const luci::CircleTopKV2Out *) final;
- void visit(const luci::CircleUniqueOut *) final;
- void visit(const luci::CircleUnpackOut *) final;
- void visit(const luci::CircleVariable *) final;
- void visit(const luci::CircleWhileOut *) final;
-
-public:
- luci::CircleNode *find_clone(const luci::CircleNode *node);
-
-protected:
- luci::CloneContext &_clonecontext;
-};
-
-/**
- * @brief Connect cloned node from input node
- */
-void clone_connect(const luci::CircleNode *node, luci::CloneContext &clonecontext);
-
-} // namespace luci
-
-#endif // __LUCI_PARTITION_CONNECT_NODE_H__
diff --git a/compiler/luci/partition/src/ConnectNode.test.h b/compiler/luci/partition/src/ConnectNode.test.h
index ac4878a15..18bb52a20 100644
--- a/compiler/luci/partition/src/ConnectNode.test.h
+++ b/compiler/luci/partition/src/ConnectNode.test.h
@@ -17,7 +17,7 @@
#ifndef __CONNECT_NODE_TEST_H__
#define __CONNECT_NODE_TEST_H__
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include <luci/Service/CircleNodeClone.h>
#include <luci/test/TestIOGraph.h>
diff --git a/compiler/luci/partition/src/Nodes/CircleAbs.cpp b/compiler/luci/partition/src/Nodes/CircleAbs.cpp
index a3fde4c45..a7fbc37d1 100644
--- a/compiler/luci/partition/src/Nodes/CircleAbs.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAbs.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp b/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp
index f3e721525..ac805c1af 100644
--- a/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAbs.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleAdd.cpp b/compiler/luci/partition/src/Nodes/CircleAdd.cpp
index d393997e9..0754be626 100644
--- a/compiler/luci/partition/src/Nodes/CircleAdd.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAdd.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp b/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp
index e457b83d2..99ae52c54 100644
--- a/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAdd.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleAddN.cpp b/compiler/luci/partition/src/Nodes/CircleAddN.cpp
index 81e5e0949..90aaeee3a 100644
--- a/compiler/luci/partition/src/Nodes/CircleAddN.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAddN.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp b/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp
index 5d0a7489f..37743d3a3 100644
--- a/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAddN.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleArgMax.cpp b/compiler/luci/partition/src/Nodes/CircleArgMax.cpp
index 1409586d7..99b30d38f 100644
--- a/compiler/luci/partition/src/Nodes/CircleArgMax.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleArgMax.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp b/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp
index c816fbeb8..77248e07e 100644
--- a/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleArgMax.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleArgMin.cpp b/compiler/luci/partition/src/Nodes/CircleArgMin.cpp
index 6151aa98a..1bb3d84e7 100644
--- a/compiler/luci/partition/src/Nodes/CircleArgMin.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleArgMin.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp b/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp
index d150be4d6..ed0cf030c 100644
--- a/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleArgMin.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp
index 547665771..1df86c7be 100644
--- a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp
index fba2be835..266120b92 100644
--- a/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleAveragePool2D.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp
index 5b1dd8543..6d50f0e31 100644
--- a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp
index 3d64f4b29..2191f5b0a 100644
--- a/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBCQFullyConnected.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp b/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp
index 90c4d9ef3..a9e810a27 100644
--- a/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBCQGather.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp b/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp
index bbbd3f157..0324d85e0 100644
--- a/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBCQGather.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp
index c3992a64e..5a459e78c 100644
--- a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp
index 94336d36a..e6d26a6a1 100644
--- a/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBatchMatMul.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp
index 2a463afb1..40b8f7052 100644
--- a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp
index 544f5e127..e9cb350b8 100644
--- a/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleBatchToSpaceND.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleCast.cpp b/compiler/luci/partition/src/Nodes/CircleCast.cpp
index f7630cd85..e1301aa06 100644
--- a/compiler/luci/partition/src/Nodes/CircleCast.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCast.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleCast.test.cpp b/compiler/luci/partition/src/Nodes/CircleCast.test.cpp
index 005119060..d7b679aa2 100644
--- a/compiler/luci/partition/src/Nodes/CircleCast.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCast.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleCeil.cpp b/compiler/luci/partition/src/Nodes/CircleCeil.cpp
index a0c94033e..e7b5f5a3f 100644
--- a/compiler/luci/partition/src/Nodes/CircleCeil.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCeil.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp b/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp
index dbd7e5390..cb0364844 100644
--- a/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCeil.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp b/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp
index fb24d21ca..d895685f0 100644
--- a/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleConcatenation.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp b/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp
index 4d64b85a2..b5c05e25d 100644
--- a/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleConcatenation.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleConst.cpp b/compiler/luci/partition/src/Nodes/CircleConst.cpp
index 118cd8de2..b88f5ef4e 100644
--- a/compiler/luci/partition/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleConst.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace luci
{
diff --git a/compiler/luci/partition/src/Nodes/CircleConv2D.cpp b/compiler/luci/partition/src/Nodes/CircleConv2D.cpp
index 46716f0ec..ca9cce18f 100644
--- a/compiler/luci/partition/src/Nodes/CircleConv2D.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleConv2D.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp
index 829adec9b..4596d9618 100644
--- a/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleConv2D.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleCos.cpp b/compiler/luci/partition/src/Nodes/CircleCos.cpp
index 9dcf81e83..76b1baac3 100644
--- a/compiler/luci/partition/src/Nodes/CircleCos.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCos.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleCos.test.cpp b/compiler/luci/partition/src/Nodes/CircleCos.test.cpp
index 6c92b93fb..ba806a3f9 100644
--- a/compiler/luci/partition/src/Nodes/CircleCos.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCos.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleCustom.cpp b/compiler/luci/partition/src/Nodes/CircleCustom.cpp
index ac16ebe40..cc1604876 100644
--- a/compiler/luci/partition/src/Nodes/CircleCustom.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCustom.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp b/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp
index 9f40b5220..f7fe86674 100644
--- a/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCustom.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp b/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp
index fee1a1a8c..0d83cffaa 100644
--- a/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCustomOut.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp
index 0a293970e..ddd4e93f2 100644
--- a/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleCustomOut.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleDensify.cpp b/compiler/luci/partition/src/Nodes/CircleDensify.cpp
new file mode 100644
index 000000000..cfb236a5d
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDensify.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleDensify *node)
+{
+ auto *cloned = loco::must_cast<luci::CircleDensify *>(cn->find_clone(node));
+
+ luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+
+ cloned->input(cn->find_clone(input));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleDensify *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp b/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp
new file mode 100644
index 000000000..94076a8db
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleDensify.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleDensify>
+{
+public:
+ NodeGraphlet() = default;
+};
+
+class TestNodeGraph : public TestIOGraph, public NodeGraphlet
+{
+public:
+ TestNodeGraph() = default;
+
+public:
+ void init(const ShapeU32 shape)
+ {
+ TestIOGraph::init(shape, shape);
+ NodeGraphlet::init(g());
+
+ node()->input(input());
+
+ output()->from(node());
+ }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_Densify)
+{
+ TestNodeGraph tng;
+ tng.init({2, 3});
+
+ ConnectionTestHelper cth;
+ cth.prepare_inputs(&tng);
+
+ auto *node = tng.node();
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(node));
+
+ auto *clone = luci::clone_node(node, cth.graph_clone());
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(clone));
+
+ cth.clone_connect(node, clone);
+
+ ASSERT_EQ(1, clone->arity());
+ ASSERT_EQ(cth.inputs(0), clone->arg(0));
+}
+
+TEST(ConnectNodeTest, connect_Densify_NEG)
+{
+ TestNodeGraph tng;
+ tng.init({2, 3});
+
+ ConnectionTestHelper cth;
+ cth.prepare_inputs_miss(&tng);
+
+ auto *node = tng.node();
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(node));
+
+ auto *clone = luci::clone_node(node, cth.graph_clone());
+ ASSERT_NO_THROW(loco::must_cast<luci::CircleDensify *>(clone));
+
+ EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp
index ade266e41..c044b4c42 100644
--- a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp
index 997360a9b..1b61a3517 100644
--- a/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDepthToSpace.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp
index 19d1d5f42..2bd9ab5ca 100644
--- a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp
index 681f98bdb..02976a488 100644
--- a/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDepthwiseConv2D.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleDequantize.cpp b/compiler/luci/partition/src/Nodes/CircleDequantize.cpp
index 3a520d4e9..ac2642bc1 100644
--- a/compiler/luci/partition/src/Nodes/CircleDequantize.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDequantize.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp b/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp
index 7f6006c1d..d3a43d374 100644
--- a/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDequantize.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleDiv.cpp b/compiler/luci/partition/src/Nodes/CircleDiv.cpp
index 480338542..8941a4196 100644
--- a/compiler/luci/partition/src/Nodes/CircleDiv.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDiv.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp b/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp
index 226932337..7900beafc 100644
--- a/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleDiv.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleElu.cpp b/compiler/luci/partition/src/Nodes/CircleElu.cpp
index d21cd4c01..b77226574 100644
--- a/compiler/luci/partition/src/Nodes/CircleElu.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleElu.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleElu.test.cpp b/compiler/luci/partition/src/Nodes/CircleElu.test.cpp
index 94774cca8..20b205048 100644
--- a/compiler/luci/partition/src/Nodes/CircleElu.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleElu.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleEqual.cpp b/compiler/luci/partition/src/Nodes/CircleEqual.cpp
index 6a126c0e2..2dc0e759b 100644
--- a/compiler/luci/partition/src/Nodes/CircleEqual.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleEqual.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp
index 20b539199..c0d3bd915 100644
--- a/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleEqual.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleExp.cpp b/compiler/luci/partition/src/Nodes/CircleExp.cpp
index 95fb1cd67..c1da7908a 100644
--- a/compiler/luci/partition/src/Nodes/CircleExp.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleExp.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleExp.test.cpp b/compiler/luci/partition/src/Nodes/CircleExp.test.cpp
index 16d7244ab..286f205bf 100644
--- a/compiler/luci/partition/src/Nodes/CircleExp.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleExp.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp b/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp
index 6fccd6310..a6ce6495c 100644
--- a/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleExpandDims.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp b/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp
index 8a5156509..37af10f52 100644
--- a/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleExpandDims.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp b/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp
index 4855d80ae..5dfaee1b5 100644
--- a/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFakeQuant.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp b/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp
index 3821d755a..2a2ec0cff 100644
--- a/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFakeQuant.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleFill.cpp b/compiler/luci/partition/src/Nodes/CircleFill.cpp
index 06fca7b41..32688cd9b 100644
--- a/compiler/luci/partition/src/Nodes/CircleFill.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFill.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleFill.test.cpp b/compiler/luci/partition/src/Nodes/CircleFill.test.cpp
index 97a5a348d..4b3872a80 100644
--- a/compiler/luci/partition/src/Nodes/CircleFill.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFill.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleFloor.cpp b/compiler/luci/partition/src/Nodes/CircleFloor.cpp
index 7ad392461..f7409a221 100644
--- a/compiler/luci/partition/src/Nodes/CircleFloor.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFloor.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp
index 1a964ea21..883d36256 100644
--- a/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFloor.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp b/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp
index 3b92b00c6..57e435c23 100644
--- a/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFloorDiv.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp
index 3d2801566..1eb603c5d 100644
--- a/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFloorDiv.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp b/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp
index 9f868d0e5..1b942d200 100644
--- a/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFloorMod.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp b/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp
index 89a09411b..680bf1680 100644
--- a/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFloorMod.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp
index da273037a..206b47aec 100644
--- a/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFullyConnected.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp b/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp
index fc88204bd..39eea5571 100644
--- a/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleFullyConnected.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleGather.cpp b/compiler/luci/partition/src/Nodes/CircleGather.cpp
index 0ee458394..4f059cbe4 100644
--- a/compiler/luci/partition/src/Nodes/CircleGather.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGather.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleGather.test.cpp b/compiler/luci/partition/src/Nodes/CircleGather.test.cpp
index 7f4e08435..f427e0456 100644
--- a/compiler/luci/partition/src/Nodes/CircleGather.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGather.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp b/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp
index 4be05ca94..6a9c3b47f 100644
--- a/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGatherNd.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp b/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp
index d673698e1..0207e917d 100644
--- a/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGatherNd.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleGreater.cpp b/compiler/luci/partition/src/Nodes/CircleGreater.cpp
index 7bc2a14c9..9f4b18fde 100644
--- a/compiler/luci/partition/src/Nodes/CircleGreater.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGreater.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp b/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp
index 842370d42..61d1f5957 100644
--- a/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGreater.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp
index 536a0aed6..76130a843 100644
--- a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp
index 76dc770f8..7e4e1ef74 100644
--- a/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleGreaterEqual.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleIf.cpp b/compiler/luci/partition/src/Nodes/CircleIf.cpp
index 1672a136d..45e4ec48b 100644
--- a/compiler/luci/partition/src/Nodes/CircleIf.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleIf.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleIf.test.cpp b/compiler/luci/partition/src/Nodes/CircleIf.test.cpp
index dbd25c822..cbb766221 100644
--- a/compiler/luci/partition/src/Nodes/CircleIf.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleIf.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleIfOut.cpp b/compiler/luci/partition/src/Nodes/CircleIfOut.cpp
index 969bdd93c..2eb5dda1f 100644
--- a/compiler/luci/partition/src/Nodes/CircleIfOut.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleIfOut.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp
index 9207654bc..ec2dde3b2 100644
--- a/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleIfOut.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp
index 386652fb1..f64ffd8b4 100644
--- a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp
index b932223d0..4363c6c18 100644
--- a/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleInstanceNorm.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp b/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp
index 61ddba264..df26930ec 100644
--- a/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleL2Normalize.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp b/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp
index 4fc23727a..b114a15f0 100644
--- a/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleL2Normalize.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp
index 24333d507..1eacddb62 100644
--- a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp
index 40328488c..22f99d5ef 100644
--- a/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleL2Pool2D.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp
index 3da1ba287..1702ddeb1 100644
--- a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp
index 5a0d1dd87..71dc55ea0 100644
--- a/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLeakyRelu.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleLess.cpp b/compiler/luci/partition/src/Nodes/CircleLess.cpp
index aab495fcc..52726f9be 100644
--- a/compiler/luci/partition/src/Nodes/CircleLess.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLess.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleLess.test.cpp b/compiler/luci/partition/src/Nodes/CircleLess.test.cpp
index ab65e5d18..c5d194efe 100644
--- a/compiler/luci/partition/src/Nodes/CircleLess.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLess.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp b/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp
index ec129dbe8..e9a3c412b 100644
--- a/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLessEqual.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp
index 0dd8986b6..29f4ababa 100644
--- a/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLessEqual.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp
index 6b0d1cd12..7a00bf94f 100644
--- a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp
index e1973387d..5e5723817 100644
--- a/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLocalResponseNormalization.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleLog.cpp b/compiler/luci/partition/src/Nodes/CircleLog.cpp
index c43570fa2..676d22fc0 100644
--- a/compiler/luci/partition/src/Nodes/CircleLog.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLog.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleLog.test.cpp b/compiler/luci/partition/src/Nodes/CircleLog.test.cpp
index 8a43f6f01..0a2b97538 100644
--- a/compiler/luci/partition/src/Nodes/CircleLog.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLog.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp
index de582c80d..c67b08f0f 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp
index 1e60bf54c..b6daeb781 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogSoftmax.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp
index 28e8f42e5..1498d85ec 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp
index a1189f06f..0b9513626 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalAnd.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp
index e2657824c..f9c077e4e 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalNot.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp
index f6b34596e..88dff3651 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalNot.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp
index 418dc023b..59592e41d 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalOr.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp
index fee3f4779..35f8029c0 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogicalOr.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleLogistic.cpp b/compiler/luci/partition/src/Nodes/CircleLogistic.cpp
index 7d788512d..804597bed 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogistic.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogistic.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp b/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp
index c4b3f7fe3..241d84040 100644
--- a/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleLogistic.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp
index e92806aff..297e9f2cc 100644
--- a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp
index 03e3c3c3e..472cab8c8 100644
--- a/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMatrixDiag.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp
index 29bb7fe5f..b327aacad 100644
--- a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp
index 5503ea18f..4ff797c43 100644
--- a/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMatrixSetDiag.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp
index 75a665aee..dee90e5c0 100644
--- a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp
index 16996497a..949e0d724 100644
--- a/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMaxPool2D.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleMaximum.cpp b/compiler/luci/partition/src/Nodes/CircleMaximum.cpp
index 2ba6055b4..459917e3e 100644
--- a/compiler/luci/partition/src/Nodes/CircleMaximum.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMaximum.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp b/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp
index 370174c37..e6a6d5741 100644
--- a/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMaximum.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleMean.cpp b/compiler/luci/partition/src/Nodes/CircleMean.cpp
index b634e5838..c704d0054 100644
--- a/compiler/luci/partition/src/Nodes/CircleMean.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMean.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleMean.test.cpp b/compiler/luci/partition/src/Nodes/CircleMean.test.cpp
index 53435d9dc..838d7aea2 100644
--- a/compiler/luci/partition/src/Nodes/CircleMean.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMean.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleMinimum.cpp b/compiler/luci/partition/src/Nodes/CircleMinimum.cpp
index cdf757583..8958bf64a 100644
--- a/compiler/luci/partition/src/Nodes/CircleMinimum.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMinimum.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp b/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp
index 2fe6b0da6..a6c86a27a 100644
--- a/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMinimum.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp b/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp
index 16a24abf7..91c3cb97a 100644
--- a/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMirrorPad.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp b/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp
index 605a126c9..b837e1012 100644
--- a/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMirrorPad.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleMul.cpp b/compiler/luci/partition/src/Nodes/CircleMul.cpp
index 2cd2b4038..12e14728c 100644
--- a/compiler/luci/partition/src/Nodes/CircleMul.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMul.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleMul.test.cpp b/compiler/luci/partition/src/Nodes/CircleMul.test.cpp
index 99cf0824d..b316679f8 100644
--- a/compiler/luci/partition/src/Nodes/CircleMul.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleMul.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleNeg.cpp b/compiler/luci/partition/src/Nodes/CircleNeg.cpp
index 413ad4930..e9dcc45cd 100644
--- a/compiler/luci/partition/src/Nodes/CircleNeg.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNeg.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp b/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp
index bd74a3665..ab13c9416 100644
--- a/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNeg.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp
index 63ff3f021..88d72e12f 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp
index 2771aef49..e796a14c3 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp
index 80e4704b9..61caa3a4c 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp
index 5a0a8da8c..eb04f2688 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV4Out.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp
index c1f117724..3b0b755a4 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
index 1f20fbb0f..c9c31b315 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp
index 69e3cc8e8..3eed260c2 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
index e001b0b0b..2c5822fe3 100644
--- a/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp b/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp
index c40c2a21a..29a6a43bb 100644
--- a/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNotEqual.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp b/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp
index 360940ca7..2983e1b27 100644
--- a/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleNotEqual.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleOneHot.cpp b/compiler/luci/partition/src/Nodes/CircleOneHot.cpp
index d76f49255..d172fb834 100644
--- a/compiler/luci/partition/src/Nodes/CircleOneHot.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleOneHot.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp b/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp
index 3c555c290..59780e424 100644
--- a/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleOneHot.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp b/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp
index a033e80a8..61d7620aa 100644
--- a/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleOutputDummy.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace luci
{
diff --git a/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp b/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp
index 106eb405d..36ce35077 100644
--- a/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleOutputExclude.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace luci
{
diff --git a/compiler/luci/partition/src/Nodes/CirclePRelu.cpp b/compiler/luci/partition/src/Nodes/CirclePRelu.cpp
index b8a2341c8..6a2325715 100644
--- a/compiler/luci/partition/src/Nodes/CirclePRelu.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePRelu.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp b/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp
index e5bcedcf6..f2a2e2c7d 100644
--- a/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePRelu.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CirclePack.cpp b/compiler/luci/partition/src/Nodes/CirclePack.cpp
index 326881067..d4b49bfa9 100644
--- a/compiler/luci/partition/src/Nodes/CirclePack.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePack.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CirclePack.test.cpp b/compiler/luci/partition/src/Nodes/CirclePack.test.cpp
index 68c513848..665b137e8 100644
--- a/compiler/luci/partition/src/Nodes/CirclePack.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePack.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CirclePad.cpp b/compiler/luci/partition/src/Nodes/CirclePad.cpp
index eb2a89c85..0a1d6f7f9 100644
--- a/compiler/luci/partition/src/Nodes/CirclePad.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePad.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CirclePad.test.cpp b/compiler/luci/partition/src/Nodes/CirclePad.test.cpp
index 24ea83fa3..72f97d6a4 100644
--- a/compiler/luci/partition/src/Nodes/CirclePad.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePad.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CirclePadV2.cpp b/compiler/luci/partition/src/Nodes/CirclePadV2.cpp
index 001fecbcb..969cc271d 100644
--- a/compiler/luci/partition/src/Nodes/CirclePadV2.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePadV2.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp b/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp
index aea8e0cce..9829f6269 100644
--- a/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePadV2.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CirclePow.cpp b/compiler/luci/partition/src/Nodes/CirclePow.cpp
index fb180ee69..ce69e7402 100644
--- a/compiler/luci/partition/src/Nodes/CirclePow.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePow.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CirclePow.test.cpp b/compiler/luci/partition/src/Nodes/CirclePow.test.cpp
index 7a5be4d13..f4e49c023 100644
--- a/compiler/luci/partition/src/Nodes/CirclePow.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CirclePow.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleQuantize.cpp b/compiler/luci/partition/src/Nodes/CircleQuantize.cpp
index 340c1da42..903a94e32 100644
--- a/compiler/luci/partition/src/Nodes/CircleQuantize.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleQuantize.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp b/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp
index 1f348b45c..5ca1a6baa 100644
--- a/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleQuantize.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleRange.cpp b/compiler/luci/partition/src/Nodes/CircleRange.cpp
index f295338d8..fa1a02c71 100644
--- a/compiler/luci/partition/src/Nodes/CircleRange.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRange.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleRange.test.cpp b/compiler/luci/partition/src/Nodes/CircleRange.test.cpp
index 59a95f119..b5b0c8aa8 100644
--- a/compiler/luci/partition/src/Nodes/CircleRange.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRange.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleRank.cpp b/compiler/luci/partition/src/Nodes/CircleRank.cpp
index f7cce762b..35b4764aa 100644
--- a/compiler/luci/partition/src/Nodes/CircleRank.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRank.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleRank.test.cpp b/compiler/luci/partition/src/Nodes/CircleRank.test.cpp
index 74c520bee..5a0a71a7e 100644
--- a/compiler/luci/partition/src/Nodes/CircleRank.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRank.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp b/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp
index ed762dbc6..262e12ac1 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceAny.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp
index 792f51187..45c292073 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceAny.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp
index 09586ecee..d91c78e41 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceMax.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp
index 8fbaf653e..2ad18f339 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceMax.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp
index 105214d0b..65fca6ab3 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceMin.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp
index c37d6248f..db48f54d7 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceMin.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp b/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp
index 2fb4e3e01..daac168b2 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceProd.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp b/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp
index cc1ac83ad..f5f69f0ff 100644
--- a/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReduceProd.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleRelu.cpp b/compiler/luci/partition/src/Nodes/CircleRelu.cpp
index d3617bdbd..63ac31ba9 100644
--- a/compiler/luci/partition/src/Nodes/CircleRelu.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRelu.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp b/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp
index ccaf5760b..ec4d10f09 100644
--- a/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRelu.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleRelu6.cpp b/compiler/luci/partition/src/Nodes/CircleRelu6.cpp
index fb9ba6f36..c2956c456 100644
--- a/compiler/luci/partition/src/Nodes/CircleRelu6.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRelu6.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp b/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp
index 1341b0e06..e9ecbe2e6 100644
--- a/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRelu6.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp b/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp
index 476195b71..1141297da 100644
--- a/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReluN1To1.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp b/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp
index 7dc63c6ef..ae60a97e5 100644
--- a/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReluN1To1.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleReshape.cpp b/compiler/luci/partition/src/Nodes/CircleReshape.cpp
index e59670453..49f7c64a7 100644
--- a/compiler/luci/partition/src/Nodes/CircleReshape.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReshape.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp b/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp
index 73cbbdfcc..198cfa1b6 100644
--- a/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReshape.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp
index 0f504015b..41fdedf2a 100644
--- a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp
index c2d8b714b..437e448a6 100644
--- a/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleResizeBilinear.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp
index c985b7f51..567db4961 100644
--- a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp
index 9cc2e558e..5dc99a385 100644
--- a/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleResizeNearestNeighbor.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp b/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp
index 225d29ea5..348cdbb78 100644
--- a/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReverseSequence.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp b/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp
index 408fc0c9c..751910326 100644
--- a/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReverseSequence.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp b/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp
index d59a7de93..4b8c4a444 100644
--- a/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReverseV2.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp
index d41ad8e66..351c6f2c0 100644
--- a/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleReverseV2.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleRound.cpp b/compiler/luci/partition/src/Nodes/CircleRound.cpp
index 9170bcdd9..97d002870 100644
--- a/compiler/luci/partition/src/Nodes/CircleRound.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRound.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleRound.test.cpp b/compiler/luci/partition/src/Nodes/CircleRound.test.cpp
index fad090476..02f335dc3 100644
--- a/compiler/luci/partition/src/Nodes/CircleRound.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRound.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp b/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp
index 03e64aad0..44abd5ef7 100644
--- a/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRsqrt.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp b/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp
index d76b96e14..39ae1f8f3 100644
--- a/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleRsqrt.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp
index f661a794c..e2b99c49d 100644
--- a/compiler/luci/partition/src/Nodes/CircleSVDF.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp
index 5fae5206e..af8cd5549 100644
--- a/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp b/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp
index 62912b791..88a3ecf19 100644
--- a/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleScatterNd.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp b/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp
index f271f8843..4ce787569 100644
--- a/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleScatterNd.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp b/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp
index 5fc320a16..6540416c6 100644
--- a/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSegmentSum.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp b/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp
index a6bcff20a..453b7cc01 100644
--- a/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSegmentSum.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSelect.cpp b/compiler/luci/partition/src/Nodes/CircleSelect.cpp
index dbe1dd48f..436e95609 100644
--- a/compiler/luci/partition/src/Nodes/CircleSelect.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSelect.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp b/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp
index 912934b8b..2a38de593 100644
--- a/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSelect.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp b/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp
index 28072c860..a8b6ab556 100644
--- a/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSelectV2.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp
index e8d128e93..c2ebdbe11 100644
--- a/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSelectV2.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleShape.cpp b/compiler/luci/partition/src/Nodes/CircleShape.cpp
index f93cf1458..2fb3dcdd8 100644
--- a/compiler/luci/partition/src/Nodes/CircleShape.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleShape.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleShape.test.cpp b/compiler/luci/partition/src/Nodes/CircleShape.test.cpp
index 9b4afdcc2..38033a3bc 100644
--- a/compiler/luci/partition/src/Nodes/CircleShape.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleShape.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSin.cpp b/compiler/luci/partition/src/Nodes/CircleSin.cpp
index 62c776ef6..0ef605994 100644
--- a/compiler/luci/partition/src/Nodes/CircleSin.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSin.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSin.test.cpp b/compiler/luci/partition/src/Nodes/CircleSin.test.cpp
index fbee6f662..e141b4530 100644
--- a/compiler/luci/partition/src/Nodes/CircleSin.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSin.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSlice.cpp b/compiler/luci/partition/src/Nodes/CircleSlice.cpp
index 7895d9ece..811d81f9e 100644
--- a/compiler/luci/partition/src/Nodes/CircleSlice.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSlice.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp b/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp
index 3c666ad6c..0718c7f15 100644
--- a/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSlice.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp b/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp
index 0a93787e7..6b08f005e 100644
--- a/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSoftmax.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp b/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp
index b25629863..571ad80ff 100644
--- a/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSoftmax.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp
index b94948bee..dc48b36d6 100644
--- a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp
index 279e9b232..0fcf22fd0 100644
--- a/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSpaceToBatchND.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp
index bd4523ca8..55d562f3d 100644
--- a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp
index 207163d08..771c1f372 100644
--- a/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSpaceToDepth.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp b/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp
index d1ed18818..cc2f5e915 100644
--- a/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSparseToDense.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp b/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp
index 2257186e8..06b3814ee 100644
--- a/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSparseToDense.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSplit.cpp b/compiler/luci/partition/src/Nodes/CircleSplit.cpp
index d6d62a8ed..5f851f049 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplit.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplit.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp
index d8d0953e0..a4242b9ab 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplit.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp b/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp
index 4021f2042..1a447581e 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplitOut.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp
index 85fe2685b..b7cf6fc7d 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplitOut.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitV.cpp b/compiler/luci/partition/src/Nodes/CircleSplitV.cpp
index f13205725..43ebe076f 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplitV.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplitV.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp
index 3ac1d6c27..877a44759 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplitV.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp b/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp
index 2034805cd..4bac6c5dc 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplitVOut.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp
index 434dfb0ad..b3cf4d939 100644
--- a/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSplitVOut.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSqrt.cpp b/compiler/luci/partition/src/Nodes/CircleSqrt.cpp
index f737aac8d..fd6d0ec05 100644
--- a/compiler/luci/partition/src/Nodes/CircleSqrt.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSqrt.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp b/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp
index fa7f7fe2a..be298835e 100644
--- a/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSqrt.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSquare.cpp b/compiler/luci/partition/src/Nodes/CircleSquare.cpp
index 1476a8694..56dd5440d 100644
--- a/compiler/luci/partition/src/Nodes/CircleSquare.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSquare.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp b/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp
index bb6a7c33f..a509b31b5 100644
--- a/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSquare.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp
index 40dd31706..e47be2c7e 100644
--- a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp
index 9cfe9eefb..a900f1dc3 100644
--- a/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSquaredDifference.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp b/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp
index bc9fda296..ffe3c911b 100644
--- a/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSqueeze.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp b/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp
index 1f0971043..7a6e2bf44 100644
--- a/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSqueeze.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp b/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp
index 3bdca8a8a..953b45107 100644
--- a/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleStridedSlice.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp b/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp
index 130ff9159..3e950fd25 100644
--- a/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleStridedSlice.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSub.cpp b/compiler/luci/partition/src/Nodes/CircleSub.cpp
index 8ac294b7b..c5bea087f 100644
--- a/compiler/luci/partition/src/Nodes/CircleSub.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSub.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSub.test.cpp b/compiler/luci/partition/src/Nodes/CircleSub.test.cpp
index 7c0d83745..ca51865a7 100644
--- a/compiler/luci/partition/src/Nodes/CircleSub.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSub.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleSum.cpp b/compiler/luci/partition/src/Nodes/CircleSum.cpp
index bef1d4676..e929fd090 100644
--- a/compiler/luci/partition/src/Nodes/CircleSum.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSum.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleSum.test.cpp b/compiler/luci/partition/src/Nodes/CircleSum.test.cpp
index 1ed65c04f..21f6bbb74 100644
--- a/compiler/luci/partition/src/Nodes/CircleSum.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleSum.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleTanh.cpp b/compiler/luci/partition/src/Nodes/CircleTanh.cpp
index e6c56ebf7..ef5c2c993 100644
--- a/compiler/luci/partition/src/Nodes/CircleTanh.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTanh.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp b/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp
index 17cd48731..1e2d0629c 100644
--- a/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTanh.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleTile.cpp b/compiler/luci/partition/src/Nodes/CircleTile.cpp
index 0381b4dac..0c217436e 100644
--- a/compiler/luci/partition/src/Nodes/CircleTile.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTile.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleTile.test.cpp b/compiler/luci/partition/src/Nodes/CircleTile.test.cpp
index 79d1ba16c..9449c1fa7 100644
--- a/compiler/luci/partition/src/Nodes/CircleTile.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTile.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp
index ce8a6f5df..41dfa9c22 100644
--- a/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTopKV2.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp
index f08f3f315..e0c4a3a84 100644
--- a/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTopKV2.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp
index 6ca6e3d29..19f0fa7bf 100644
--- a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp
index a5c1c43f7..ba085f6a9 100644
--- a/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTopKV2Out.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleTranspose.cpp b/compiler/luci/partition/src/Nodes/CircleTranspose.cpp
index 1cbb54666..cbbdb0090 100644
--- a/compiler/luci/partition/src/Nodes/CircleTranspose.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTranspose.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp b/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp
index b3b16307c..847683844 100644
--- a/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTranspose.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp
index 469cc9a1a..6b6819d59 100644
--- a/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTransposeConv.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp b/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp
index ee9fb0e78..68adaad81 100644
--- a/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleTransposeConv.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
index 3f0374aac..332301455 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
index aeefef093..2630461ae 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnidirectionalSequenceLSTM.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleUnique.cpp b/compiler/luci/partition/src/Nodes/CircleUnique.cpp
index 79ca59466..c035b7ed7 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnique.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnique.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp
index 23f299840..910087a8b 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnique.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp b/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp
index f244dd6eb..23b1abaa5 100644
--- a/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUniqueOut.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp
index 887640790..954957497 100644
--- a/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUniqueOut.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleUnpack.cpp b/compiler/luci/partition/src/Nodes/CircleUnpack.cpp
index f83c5d810..43ebcb418 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnpack.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnpack.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp
index b164cc3bc..444b04373 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnpack.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp b/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp
index b8982fff5..ee1de153f 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnpackOut.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp
index 9ed440966..2aaef8d04 100644
--- a/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleUnpackOut.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleVariable.cpp b/compiler/luci/partition/src/Nodes/CircleVariable.cpp
index f7f6f21fd..e7a794a16 100644
--- a/compiler/luci/partition/src/Nodes/CircleVariable.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleVariable.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace luci
{
diff --git a/compiler/luci/partition/src/Nodes/CircleWhere.cpp b/compiler/luci/partition/src/Nodes/CircleWhere.cpp
index 8ef274268..d0fc8465d 100644
--- a/compiler/luci/partition/src/Nodes/CircleWhere.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleWhere.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp
index 942f804c2..f17131c94 100644
--- a/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleWhere.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleWhile.cpp b/compiler/luci/partition/src/Nodes/CircleWhile.cpp
index 7820aca01..95b77f753 100644
--- a/compiler/luci/partition/src/Nodes/CircleWhile.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleWhile.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp
index bffb7869d..6ee7aba62 100644
--- a/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleWhile.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp b/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp
index 1cb4419db..5cd68355c 100644
--- a/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleWhileOut.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp b/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp
index 901f31b01..f58eba031 100644
--- a/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleWhileOut.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp b/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp
index 715042d86..795d88de3 100644
--- a/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleZerosLike.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
namespace
{
diff --git a/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp b/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp
index 74c873cb2..f887bc36f 100644
--- a/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp
+++ b/compiler/luci/partition/src/Nodes/CircleZerosLike.test.cpp
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "ConnectNode.test.h"
diff --git a/compiler/luci/partition/src/PartitionIR.cpp b/compiler/luci/partition/src/PartitionIR.cpp
index 60dc74f89..969fa7092 100644
--- a/compiler/luci/partition/src/PartitionIR.cpp
+++ b/compiler/luci/partition/src/PartitionIR.cpp
@@ -64,7 +64,7 @@ std::unique_ptr<PGroups> PGroups::make_copy(void) const
// note: d_pgroup is now nullptr as it's moved
}
- return std::move(d_pgroups);
+ return d_pgroups;
}
GroupKey PGroups::group_of(luci::CircleNode *node) const
diff --git a/compiler/luci/partition/src/PartitionMerge.cpp b/compiler/luci/partition/src/PartitionMerge.cpp
index 4c3971bd8..aa8a827cd 100644
--- a/compiler/luci/partition/src/PartitionMerge.cpp
+++ b/compiler/luci/partition/src/PartitionMerge.cpp
@@ -255,7 +255,7 @@ std::unique_ptr<luci::PGroups> merge_pgroups(const luci::PGroups *s_pgroups)
}
} while (changed);
- return std::move(d_pgroups);
+ return d_pgroups;
}
} // namespace luci
diff --git a/compiler/luci/partition/src/PartitionPGroups.cpp b/compiler/luci/partition/src/PartitionPGroups.cpp
index eaeacf9c4..2e95f08f7 100644
--- a/compiler/luci/partition/src/PartitionPGroups.cpp
+++ b/compiler/luci/partition/src/PartitionPGroups.cpp
@@ -257,7 +257,7 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
}
}
- return std::move(pgroups);
+ return pgroups;
}
} // namespace luci
diff --git a/compiler/luci/partition/src/PartitionPModules.cpp b/compiler/luci/partition/src/PartitionPModules.cpp
index beaaf6093..251dbea39 100644
--- a/compiler/luci/partition/src/PartitionPModules.cpp
+++ b/compiler/luci/partition/src/PartitionPModules.cpp
@@ -15,7 +15,7 @@
*/
#include "PartitionPModules.h"
-#include "ConnectNode.h"
+#include "luci/ConnectNode.h"
#include "luci/Service/CircleNodeClone.h"
#include "luci/Log.h"
@@ -156,7 +156,7 @@ std::unique_ptr<loco::Graph> clone_graph(loco::Graph *graph_org, luci::CloneCont
add_graph_output(graph_clone, output_clone);
}
- return std::move(graph);
+ return graph;
}
void clone_recursive_subgraphs(luci::PartedModule &pm, loco::Graph *graph,
diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt
index 5237c6d3f..d9d004db9 100644
--- a/compiler/luci/pass/CMakeLists.txt
+++ b/compiler/luci/pass/CMakeLists.txt
@@ -1,9 +1,16 @@
nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+nnas_find_package(Fp16Source QUIET)
+
if(NOT FlatBuffers_FOUND)
message(STATUS "FlatBuffers NOT FOUND")
return()
endif(NOT FlatBuffers_FOUND)
+if(NOT Fp16Source_FOUND)
+ message(STATUS "Fp16Source NOT FOUND")
+ return()
+endif(NOT Fp16Source_FOUND)
+
file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
@@ -14,6 +21,7 @@ endif(NOT LUCI_LIBRARY_TYPE)
add_library(luci_pass ${LUCI_LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_pass PRIVATE src)
+target_include_directories(luci_pass PRIVATE ${Fp16Source_DIR}/include)
target_include_directories(luci_pass PUBLIC include)
target_link_libraries(luci_pass PUBLIC loco)
target_link_libraries(luci_pass PUBLIC logo_core)
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h
index c803898f6..b94822c35 100644
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -47,8 +47,10 @@ public:
ResolveCustomOpBatchMatMul,
ResolveCustomOpMatMul,
ResolveCustomOpMaxPoolWithArgmax,
+ ResolveCustomOpSplitV,
FoldAddV2,
FoldCast,
+ FoldDensify,
FoldDepthwiseConv2D,
FoldDequantize,
FoldGather,
@@ -61,6 +63,7 @@ public:
ShuffleWeightTo16x1Float32,
RemoveRedundantTranspose,
ReplaceMulAddWithDepthwiseConv,
+ ReplaceNonConstFCWithBatchMatMul,
ReplaceSubWithAdd,
SubstitutePackToReshape,
SubstitutePadV2ToPad,
diff --git a/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h b/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h
new file mode 100644
index 000000000..8ec81b1d4
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldDensifyPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_DENSIFY_PASS_H__
+#define __LUCI_FOLD_DENSIFY_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to Fold Densify if input is Sparse Constant
+ *
+ */
+struct FoldDensifyPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FoldDensifyPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_DENSIFY_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h b/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h
new file mode 100644
index 000000000..2deb75297
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveRedundantDequantizePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__
+#define __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to remove redundant dequantize operations
+ */
+struct RemoveRedundantDequantizePass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::RemoveRedundantDequantizePass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_REDUNDANT_DEQUANTIZE_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h
new file mode 100644
index 000000000..19948a31c
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveUnnecessaryReshapeNetPass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__
+#define __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to remove unnecessary Reshape nodes.
+ * @details This class will remove unnecessary pre/post-Reshape nodes.
+ * See https://github.com/Samsung/ONE/issues/9600 for more details.
+ */
+struct RemoveUnnecessaryReshapeNetPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::RemoveUnnecessaryReshapeNetPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_UNNECESSARY_RESHAPE_NET_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h b/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h
new file mode 100644
index 000000000..24e16ec49
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__
+#define __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to replace "FC with non-const weight" with Batched MatMul
+ */
+struct ReplaceNonConstFCWithBatchMatMulPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::ReplaceNonConstFCWithBatchMatMulPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REPLACE_NONCONST_FC_WITH_BATCH_MATMUL_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h b/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h
new file mode 100644
index 000000000..d4f0147e8
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ResolveCustomOpSplitVPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__
+#define __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to resolve certain custom op of subgraph into splitv op in circle schema.
+ */
+struct ResolveCustomOpSplitVPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::ResolveCustomOpSplitVPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_RESOLVE_CUSTOM_OP_SPLIT_V_PASS_H__
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
index 6dbb22d7c..74c569d20 100644
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -20,6 +20,7 @@
#include "luci/Pass/ExpandBroadcastConstPass.h"
#include "luci/Pass/FoldAddV2Pass.h"
#include "luci/Pass/FoldCastPass.h"
+#include "luci/Pass/FoldDensifyPass.h"
#include "luci/Pass/FoldDepthwiseConv2DPass.h"
#include "luci/Pass/FoldDequantizePass.h"
#include "luci/Pass/FoldGatherPass.h"
@@ -43,15 +44,18 @@
#include "luci/Pass/RemoveRedundantTransposePass.h"
#include "luci/Pass/RemoveRedundantQuantizePass.h"
#include "luci/Pass/RemoveUnnecessaryReshapePass.h"
+#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h"
#include "luci/Pass/RemoveUnnecessarySlicePass.h"
#include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h"
#include "luci/Pass/RemoveUnnecessarySplitPass.h"
+#include "luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h"
#include "luci/Pass/ReplaceMulAddWithDepthwiseConvPass.h"
#include "luci/Pass/ReplaceSubWithAddPass.h"
#include "luci/Pass/ResolveCustomOpAddPass.h"
#include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
#include "luci/Pass/ResolveCustomOpMatMulPass.h"
#include "luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h"
+#include "luci/Pass/ResolveCustomOpSplitVPass.h"
#include "luci/Pass/SparsifyTensorPass.h"
#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h"
#include "luci/Pass/SubstitutePackToReshapePass.h"
@@ -127,7 +131,8 @@ bool OptimizeOptionsImpl::query(Algorithm algo)
return true;
}
-void convert_nchw_to_nhwc(loco::Graph *g, bool preserve_input, bool preserve_output)
+// TODO Make a struct for args
+void convert_nchw_to_nhwc(loco::Graph *g, bool preserve_input, bool preserve_output, bool fuse_fc)
{
logo::Phase phase;
@@ -135,6 +140,21 @@ void convert_nchw_to_nhwc(loco::Graph *g, bool preserve_input, bool preserve_out
phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+ // Resolve custom Ops
+ phase.emplace_back(std::make_unique<luci::ResolveCustomOpAddPass>());
+ phase.emplace_back(std::make_unique<luci::ResolveCustomOpBatchMatMulPass>());
+ phase.emplace_back(std::make_unique<luci::ResolveCustomOpMatMulPass>());
+ phase.emplace_back(std::make_unique<luci::ResolveCustomOpMaxPoolWithArgmaxPass>());
+ phase.emplace_back(std::make_unique<luci::ResolveCustomOpSplitVPass>());
+
+ // Fuse FullyConnected with Add
+ // Why we perform FuseAddWithFullyConnectedPass before ConvertNCHWToNHWCPass?
+ // FullyConnected Op's layout is not changed in ConvertNCHWToNHWCPass, while
+ // Add Op's layer is changed from NCHW to NHWC.
+ // This disables fusion of Add and FullyConnected after ConvertNCHWToNHWC.
+ if (fuse_fc)
+ phase.emplace_back(std::make_unique<luci::FuseAddWithFullyConnectedPass>());
+
phase.emplace_back(
std::make_unique<luci::ConvertNCHWToNHWCPass>(preserve_input, preserve_output));
@@ -190,7 +210,9 @@ void CircleOptimizer::optimize(loco::Graph *g) const
bool preserve_output =
_options->param(Options::AlgorithmParameters::NCHW_to_NHWC_output_shape) != "true";
- convert_nchw_to_nhwc(g, preserve_input, preserve_output);
+ bool fuse_fc = _options->query(Options::Algorithm::FuseAddWithFullyConnected);
+
+ convert_nchw_to_nhwc(g, preserve_input, preserve_output, fuse_fc);
}
/* TRANSFORM DECLARATION BEGIN */
@@ -220,6 +242,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
{
phase.emplace_back(std::make_unique<luci::ResolveCustomOpMaxPoolWithArgmaxPass>());
}
+ if (_options->query(Options::Algorithm::ResolveCustomOpSplitV))
+ {
+ phase.emplace_back(std::make_unique<luci::ResolveCustomOpSplitVPass>());
+ }
if (_options->query(Options::Algorithm::FuseInstanceNorm))
{
phase.emplace_back(std::make_unique<FuseInstanceNormPass>());
@@ -260,6 +286,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
{
phase.emplace_back(std::make_unique<luci::FoldCastPass>());
}
+ if (_options->query(Options::Algorithm::FoldDensify))
+ {
+ phase.emplace_back(std::make_unique<luci::FoldDensifyPass>());
+ }
if (_options->query(Options::Algorithm::FoldDepthwiseConv2D))
{
phase.emplace_back(std::make_unique<luci::FoldDepthwiseConv2DPass>());
@@ -307,6 +337,7 @@ void CircleOptimizer::optimize(loco::Graph *g) const
if (_options->query(Options::Algorithm::RemoveUnnecessaryReshape))
{
phase.emplace_back(std::make_unique<luci::RemoveUnnecessaryReshapePass>());
+ phase.emplace_back(std::make_unique<luci::RemoveUnnecessaryReshapeNetPass>());
}
if (_options->query(Options::Algorithm::RemoveUnnecessarySlice))
{
@@ -332,6 +363,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
{
phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
}
+ if (_options->query(Options::Algorithm::ReplaceNonConstFCWithBatchMatMul))
+ {
+ phase.emplace_back(std::make_unique<luci::ReplaceNonConstFCWithBatchMatMulPass>());
+ }
if (_options->query(Options::Algorithm::ReplaceMulAddWithDepthwiseConv))
{
phase.emplace_back(std::make_unique<luci::ReplaceMulAddWithDepthwiseConvPass>());
diff --git a/compiler/luci/pass/src/CircleQuantizer.cpp b/compiler/luci/pass/src/CircleQuantizer.cpp
index ce38a90b9..9a6550b9f 100644
--- a/compiler/luci/pass/src/CircleQuantizer.cpp
+++ b/compiler/luci/pass/src/CircleQuantizer.cpp
@@ -22,6 +22,7 @@
#include "luci/Pass/RequantizePass.h"
#include "luci/Pass/ConvertToFakeQuantizedModelPass.h"
#include "luci/Pass/FoldDequantizePass.h"
+#include "luci/Pass/RemoveRedundantDequantizePass.h"
#include "luci/Pass/QuantizePreCheckerPass.h"
#include "luci/Pass/QuantizeWithMinMaxPass.h"
#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
@@ -252,8 +253,8 @@ void CircleQuantizer::quantize(loco::Graph *g) const
static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"};
static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"};
static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
- static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16"};
- static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16"};
+ static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16", "float32"};
+ static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16", "float32"};
auto input_model_dtype =
_options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
@@ -434,6 +435,8 @@ void CircleQuantizer::quantize(loco::Graph *g) const
phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+ // Remove redundant Dequantize Ops generated during fake quantization
+ phase.emplace_back(std::make_unique<luci::RemoveRedundantDequantizePass>());
// Fold Dequantize Ops generated during fake quantization
phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
index ce4f54035..55a29d105 100644
--- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
@@ -28,6 +28,69 @@
namespace
{
+// Return true if from can be broadcasted to to
+// to's shape is [N, C, H, W]
+bool broadcastable(const luci::CircleConst *from, const luci::CircleNode *to)
+{
+ assert(to->rank() == 4); // FIX_CALLER_UNLESS
+
+ const auto from_rank = from->rank();
+ if (from_rank > 4)
+ return false;
+
+ // Scalar is always broadcastable
+ if (from_rank == 0)
+ return true;
+
+ for (uint32_t i = 1; i <= from_rank; i++)
+ {
+ auto to_index = 4 - i;
+ auto from_index = from_rank - i;
+
+ if (from->dim(from_index).value() != to->dim(to_index).value() and
+ from->dim(from_index).value() != 1)
+ return false;
+ }
+
+ return true;
+}
+
+// Expand node to rank 4
+// node should have rank less than or equal to 4
+void expand_to_rank_4(luci::CircleConst *node)
+{
+ auto original_rank = node->rank();
+
+ assert(original_rank <= 4); // FIX_CALLER_UNLESS
+
+ if (original_rank == 4)
+ return;
+
+ std::vector<uint32_t> original_shape;
+ for (uint32_t i = 0; i < original_rank; i++)
+ {
+ original_shape.emplace_back(node->dim(i).value());
+ }
+
+ node->rank(4);
+ for (uint32_t i = 0; i < (4 - original_rank); i++)
+ node->dim(i) = 1;
+
+ for (uint32_t i = 0; i < original_rank; i++)
+ node->dim(i + (4 - original_rank)) = original_shape.at(i);
+}
+
+bool is_output(const loco::Node *node)
+{
+ auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+ auto opcode = cnode->opcode();
+ if (opcode == luci::CircleOpcode::CIRCLEOUTPUT ||
+ opcode == luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+ return true;
+
+ return false;
+}
+
bool is_same_shape(const luci::CircleNode *node, const std::vector<loco::Dimension> &shape)
{
if (not node)
@@ -484,7 +547,7 @@ bool is_NCHW_with_s_const(const T *node, luci::CircleNode *&pred_node,
//
// Find MUL with an NCHW pattern described below
// - Input (non-constant) shape : [N, C, H, W]
-// - Input (constant) shape : [1, C, 1, 1], [N, C, H, W] or a scalar (1)
+// - Input (constant) shape : broadcastable to [N, C, H, W]
// - Output shape : [N, C, H, W]
bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_node,
luci::CircleConst *&multiplier)
@@ -511,32 +574,12 @@ bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_nod
if (pred_node->rank() != 4)
return false;
- const auto const_rank = multiplier->rank();
- // Support Rank 4 or scalar (rank 0 or 1)
- if (const_rank != 4 && const_rank != 0 && const_rank != 1)
+ if (not broadcastable(multiplier, node))
return false;
- const auto input_cdim = pred_node->dim(1);
- const auto output_cdim = node->dim(1);
-
- if (const_rank == 4)
- {
- bool supported_shape = false;
-
- // Check multiplier is (1, C, 1, 1)
- if (is_same_shape(multiplier, {1, node->dim(1), 1, 1}))
- supported_shape = true;
-
- // Check multiplier is (N, C, H, W)
- if (is_same_shape(multiplier, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)}))
- supported_shape = true;
+ expand_to_rank_4(multiplier);
- return supported_shape;
- }
- if (input_cdim == output_cdim)
- return true;
- else
- return false;
+ return true;
}
// We assume ADD with const input is NCHW if,
@@ -569,32 +612,12 @@ bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_nod
if (pred_node->rank() != 4)
return false;
- const auto const_rank = beta->rank();
- // Support Rank 4 or scalar (rank 0 or 1)
- if (const_rank != 4 && const_rank != 0 && const_rank != 1)
+ if (not broadcastable(beta, node))
return false;
- const auto input_cdim = pred_node->dim(1);
- const auto output_cdim = node->dim(1);
-
- if (const_rank == 4)
- {
- bool supported_shape = false;
-
- // Check beta is (1, C, 1, 1)
- if (is_same_shape(beta, {1, node->dim(1), 1, 1}))
- supported_shape = true;
-
- // Check beta is (N, C, H, W)
- if (is_same_shape(beta, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)}))
- supported_shape = true;
+ expand_to_rank_4(beta);
- return supported_shape;
- }
- if (input_cdim == output_cdim)
- return true;
- else
- return false;
+ return true;
}
// We assume SUB with const input is NCHW if,
@@ -675,6 +698,24 @@ template <class T> bool convert_unary_x(T *node)
return true;
}
+template <class T> bool convert_unary_logits(T *node)
+{
+ const auto pred_node = loco::must_cast<luci::CircleNode *>(node->logits());
+ auto pre_trans = create_pre_transpose(node);
+ pre_trans->a(pred_node);
+ node->logits(pre_trans);
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ auto post_trans = create_post_transpose(node);
+ loco::replace(node).with(post_trans);
+
+ post_trans->a(node);
+
+ return true;
+}
+
class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
{
// Default
@@ -742,17 +783,14 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
if (is_NCHW_with_const(node, pred_node, beta))
{
+ assert(beta->rank() == 4); // FIX is_NCHW_with_const unless
+ auto nhwc_const = create_NHWC_from_NCHW(beta);
+ if (nhwc_const == nullptr)
+ return false;
+ node->y(nhwc_const);
+
auto pre_trans = create_pre_transpose(node);
pre_trans->a(pred_node);
-
- if (beta->rank() == 4)
- {
- auto nhwc_const = create_NHWC_from_NCHW(beta);
- if (nhwc_const == nullptr)
- return false;
- node->y(nhwc_const);
- }
-
node->x(pre_trans);
}
else if (beta == nullptr)
@@ -816,6 +854,11 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
bool visit(luci::CircleLogistic *node) { return convert_unary_x<luci::CircleLogistic>(node); }
+ bool visit(luci::CircleLogSoftmax *node)
+ {
+ return convert_unary_logits<luci::CircleLogSoftmax>(node);
+ }
+
bool visit(luci::CircleMaximum *node)
{
luci::CircleNode *pred_node = nullptr;
@@ -954,15 +997,15 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
if (is_NCHW_with_const(node, pred_node, multiplier))
{
+ assert(multiplier->rank() == 4); // FIX is_NCHW_with_const unless
+ auto nhwc_const = create_NHWC_from_NCHW(multiplier);
+ if (nhwc_const == nullptr)
+ return false;
+ node->y(nhwc_const);
+
auto pre_trans = create_pre_transpose(node);
pre_trans->a(pred_node);
node->x(pre_trans);
-
- if (multiplier->rank() == 4)
- {
- auto nhwc_const = create_NHWC_from_NCHW(multiplier);
- node->y(nhwc_const);
- }
}
else if (multiplier == nullptr)
{
@@ -1049,12 +1092,127 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
return true;
}
+ // TODO Reduce duplicate code with CircleMean
+ bool visit(luci::CircleReduceMax *node)
+ {
+ auto input = loco::must_cast<luci::CircleNode *>(node->input());
+ if (input->rank() != 4)
+ return false;
+
+ auto rindices = dynamic_cast<luci::CircleConst *>(node->reduction_indices());
+ if (not rindices)
+ return false;
+
+ auto nhwc_rindices = create_NHWC_rindices(rindices);
+ if (not nhwc_rindices)
+ return false;
+
+ auto pre_trans = create_pre_transpose(node);
+ pre_trans->a(input);
+ node->input(pre_trans);
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ node->reduction_indices(nhwc_rindices);
+
+ if (node->keep_dims())
+ {
+ auto post_trans = create_post_transpose(node);
+ loco::replace(node).with(post_trans);
+
+ post_trans->a(node);
+
+ return true;
+ }
+
+ // The below codes handle the cases where node->keep_dims() == false
+ // 1D output never needs a transpose
+ if (node->rank() <= 1)
+ return true;
+
+ std::vector<bool> reduced_dims_nhwc(4, false);
+ uint32_t num_reduced_indices = nhwc_rindices->size<loco::DataType::S32>();
+
+ for (uint32_t ri = 0; ri < num_reduced_indices; ++ri)
+ {
+ reduced_dims_nhwc[nhwc_rindices->at<loco::DataType::S32>(ri)] = true;
+ }
+
+ // if channel dimension has been reduced, we don't need a transpose
+ if (reduced_dims_nhwc[3])
+ return true;
+
+ // likewise, if both space dimensions are reduced, no transpose is needed
+ if (reduced_dims_nhwc[1] && reduced_dims_nhwc[2])
+ return true;
+
+ std::vector<int32_t> post_trans_ind;
+ // case 1: only N is reduced
+ if (num_reduced_indices == 1 && reduced_dims_nhwc[0])
+ post_trans_ind = {2, 0, 1};
+
+ // case 2: only H or W is reduced
+ if (num_reduced_indices == 1 && (reduced_dims_nhwc[1] || reduced_dims_nhwc[2]))
+ post_trans_ind = {0, 2, 1};
+
+ // case 3: N and either H or W are reduced
+ if (num_reduced_indices == 2)
+ post_trans_ind = {1, 0};
+
+ auto post_trans = create_Nd_transpose(node, post_trans_ind);
+ loco::replace(node).with(post_trans);
+
+ post_trans->a(node);
+
+ return true;
+ }
+
bool visit(luci::CircleRelu *node) { return convert_unary_features<luci::CircleRelu>(node); }
bool visit(luci::CircleRelu6 *node) { return convert_unary_features<luci::CircleRelu6>(node); }
bool visit(luci::CircleRsqrt *node) { return convert_unary_x<luci::CircleRsqrt>(node); }
+ bool visit(luci::CircleSoftmax *node) { return convert_unary_logits<luci::CircleSoftmax>(node); }
+
+ bool visit(luci::CircleSplitV *node)
+ {
+ // Change split dimension
+ auto axis = dynamic_cast<luci::CircleConst *>(node->split_dim());
+ if (not axis)
+ return false;
+
+ if (axis->dtype() != loco::DataType::S32)
+ return false;
+
+ if (axis->size<loco::DataType::S32>() != 1)
+ return false;
+
+ axis->at<loco::DataType::S32>(0) = nchw_axis_to_nhwc(axis->at<loco::DataType::S32>(0));
+
+ // Insert pre-transpose
+ const auto pred_node = loco::must_cast<luci::CircleNode *>(node->input());
+ auto pre_trans = create_pre_transpose(node);
+ pre_trans->a(pred_node);
+ node->input(pre_trans);
+
+ // Do shape inference for this node again.
+ node->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ // Insert post-transposes
+ for (auto succ : loco::succs(node))
+ {
+ auto svo = loco::must_cast<luci::CircleSplitVOut *>(succ);
+
+ auto post_trans = create_post_transpose(svo);
+ loco::replace(svo).with(post_trans);
+ post_trans->a(svo);
+ }
+
+ return true;
+ }
+
bool visit(luci::CircleSquaredDifference *node)
{
// TODO support CircleConst input
@@ -1195,6 +1353,8 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
// pre-Transpose --- [intermediate Ops] --- post-Transpose
// |
// +--[intermediate Ops] --- post-Transpose
+ //
+ // NOTE Intermediate Ops SHOULD NOT contain pre-Transpose/Reshape
for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
{
if (has_data_format(node))
@@ -1202,25 +1362,51 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
if (is_pre_transpose(node) || is_pre_reshape(node))
{
+ std::set<loco::Node *> intermediate;
+
+ // Variable to check intermediate Ops contain pre-Transpose/Reshape
+ bool has_pre = false;
+
+ // Variable to check the pattern is closed with post-Transpose/Reshape
+ bool is_closed = true;
+
// For recursive call of lambda
- std::function<void(loco::Node *)> set_data_format_to_succs;
- set_data_format_to_succs = [&](loco::Node *n) {
+ std::function<void(loco::Node *)> collect_intermediate;
+ collect_intermediate = [&](loco::Node *n) {
for (auto succ : loco::succs(n))
{
// Exit condition
if (is_post_transpose(succ) || is_post_reshape(succ))
continue;
- if (not has_data_format(succ))
+ if (is_pre_transpose(succ) || is_pre_reshape(succ))
+ {
+ has_pre = true;
+ break;
+ }
+
+ if (is_output(succ))
{
- set_data_format(succ, DataFormat::NHWC);
+ is_closed = false;
+ break;
}
- set_data_format_to_succs(succ);
+ intermediate.emplace(succ);
+
+ collect_intermediate(succ);
}
};
- set_data_format_to_succs(node);
+ collect_intermediate(node);
+
+ if (has_pre or not is_closed)
+ continue;
+
+ for (auto inter : intermediate)
+ {
+ if (not has_data_format(inter))
+ set_data_format(inter, DataFormat::NHWC);
+ }
}
}
@@ -1248,6 +1434,7 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
case luci::CircleOpcode::ELU:
case luci::CircleOpcode::LEAKY_RELU:
case luci::CircleOpcode::LOGISTIC:
+ case luci::CircleOpcode::LOG_SOFTMAX:
case luci::CircleOpcode::MAXIMUM:
case luci::CircleOpcode::MEAN:
case luci::CircleOpcode::MINIMUM:
@@ -1255,9 +1442,12 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
case luci::CircleOpcode::NEG:
case luci::CircleOpcode::PAD:
case luci::CircleOpcode::PADV2:
+ case luci::CircleOpcode::REDUCE_MAX:
case luci::CircleOpcode::RELU:
case luci::CircleOpcode::RELU6:
case luci::CircleOpcode::RSQRT:
+ case luci::CircleOpcode::SOFTMAX:
+ case luci::CircleOpcode::SPLIT_V:
case luci::CircleOpcode::SQUARED_DIFFERENCE:
case luci::CircleOpcode::SUB:
if (!has_data_format(node))
@@ -1296,7 +1486,8 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
if (circle_node->rank() != 4)
{
// TODO replace the check above with the input rank check, and remove the condition below
- if (not dynamic_cast<luci::CircleMean *>(node))
+ if (not dynamic_cast<luci::CircleMean *>(node) and
+ not dynamic_cast<luci::CircleReduceMax *>(node))
continue;
}
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
index dd81d1380..6bb3d3268 100644
--- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
@@ -16,6 +16,8 @@
#include <logo/Phase.h>
+#include <luci/test/TestIOGraph.h>
+
#include "luci/Pass/ConvertNCHWToNHWCPass.h"
#include "luci/Pass/CircleShapeInferencePass.h"
@@ -23,6 +25,8 @@
#include <gtest/gtest.h>
+using namespace luci::test;
+
namespace
{
@@ -202,6 +206,173 @@ public:
luci::CircleConst *post_shape = nullptr;
};
+/**
+ * Graph with pre-Reshape but no post-Transpose/Reshape.
+ *
+ * BEFORE
+ * [Input]
+ * |
+ * [Pre-Reshape]
+ * |
+ * [Relu]
+ * |
+ * [Output]
+ *
+ * AFTER
+ * [Input]
+ * |
+ * [Pre-Reshape]
+ * |
+ * [Pre-Transpose]
+ * |
+ * [Relu]
+ * |
+ * [Post-Transpose]
+ * |
+ * [Output]
+ */
+class NoPostReshapeGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ relu = g.nodes()->create<luci::CircleRelu>();
+ pre_reshape = g.nodes()->create<luci::CircleReshape>();
+ pre_shape = g.nodes()->create<luci::CircleConst>();
+
+ pre_shape->dtype(loco::DataType::S32);
+
+ uint32_t channel_size = 16;
+ auto in = loco::must_cast<luci::CircleNode *>(input);
+ in->shape({1, channel_size, 4, 4});
+ pre_shape->shape({4});
+
+ pre_shape->size<loco::DataType::S32>(4);
+ pre_shape->at<loco::DataType::S32>(0) = 1;
+ pre_shape->at<loco::DataType::S32>(1) = 4;
+ pre_shape->at<loco::DataType::S32>(2) = 4;
+ pre_shape->at<loco::DataType::S32>(3) = channel_size;
+
+ pre_reshape->tensor(input);
+ pre_reshape->shape(pre_shape);
+ relu->features(pre_reshape);
+
+ relu->name("Relu");
+ pre_reshape->name("pre-reshape");
+
+ return relu;
+ }
+
+public:
+ luci::CircleRelu *relu = nullptr;
+ luci::CircleReshape *pre_reshape = nullptr;
+ luci::CircleConst *pre_shape = nullptr;
+};
+
+/**
+ * Graph with two pre-Reshapes
+ *
+ * BEFORE
+ * [Input]
+ * |
+ * [Pre-Reshape]
+ * |
+ * [Relu]
+ * |
+ * [Pre-Reshape]
+ * |
+ * [Post-Reshape]
+ * |
+ * [Output]
+ *
+ * AFTER
+ * [Input]
+ * |
+ * [Pre-Reshape]
+ * |
+ * [Pre-Transpose]
+ * |
+ * [Relu]
+ * |
+ * [Post-Transpose]
+ * |
+ * [Pre-Reshape]
+ * |
+ * [Post-Reshape]
+ * |
+ * [Output]
+ */
+class ReluNotClosedGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ relu = g.nodes()->create<luci::CircleRelu>();
+ pre_reshape = g.nodes()->create<luci::CircleReshape>();
+ pre_reshape_2 = g.nodes()->create<luci::CircleReshape>();
+ post_reshape = g.nodes()->create<luci::CircleReshape>();
+ pre_shape = g.nodes()->create<luci::CircleConst>();
+ pre_shape_2 = g.nodes()->create<luci::CircleConst>();
+ post_shape = g.nodes()->create<luci::CircleConst>();
+
+ pre_shape->dtype(loco::DataType::S32);
+ pre_shape_2->dtype(loco::DataType::S32);
+ post_shape->dtype(loco::DataType::S32);
+
+ uint32_t channel_size = 16;
+ auto in = loco::must_cast<luci::CircleNode *>(input);
+ in->shape({1, channel_size, 4, 4});
+ pre_shape->shape({4});
+ pre_shape_2->shape({4});
+ post_shape->shape({4});
+
+ pre_shape->size<loco::DataType::S32>(4);
+ pre_shape->at<loco::DataType::S32>(0) = 1;
+ pre_shape->at<loco::DataType::S32>(1) = 4;
+ pre_shape->at<loco::DataType::S32>(2) = 4;
+ pre_shape->at<loco::DataType::S32>(3) = channel_size;
+
+ pre_shape_2->size<loco::DataType::S32>(4);
+ pre_shape_2->at<loco::DataType::S32>(0) = 1;
+ pre_shape_2->at<loco::DataType::S32>(1) = 4;
+ pre_shape_2->at<loco::DataType::S32>(2) = channel_size;
+ pre_shape_2->at<loco::DataType::S32>(3) = 4;
+
+ post_shape->size<loco::DataType::S32>(4);
+ post_shape->at<loco::DataType::S32>(0) = 1;
+ post_shape->at<loco::DataType::S32>(1) = 4;
+ post_shape->at<loco::DataType::S32>(2) = 4;
+ post_shape->at<loco::DataType::S32>(3) = channel_size;
+
+ pre_reshape->tensor(input);
+ pre_reshape->shape(pre_shape);
+
+ relu->features(pre_reshape);
+
+ pre_reshape_2->tensor(relu);
+ pre_reshape_2->shape(pre_shape_2);
+
+ post_reshape->tensor(pre_reshape_2);
+ post_reshape->shape(post_shape);
+
+ relu->name("Relu");
+ pre_reshape->name("pre-reshape");
+ pre_reshape->name("pre-reshape-2");
+ post_reshape->name("post-reshape");
+
+ return post_reshape;
+ }
+
+public:
+ luci::CircleRelu *relu = nullptr;
+ luci::CircleReshape *pre_reshape = nullptr;
+ luci::CircleReshape *pre_reshape_2 = nullptr;
+ luci::CircleReshape *post_reshape = nullptr;
+ luci::CircleConst *pre_shape = nullptr;
+ luci::CircleConst *pre_shape_2 = nullptr;
+ luci::CircleConst *post_shape = nullptr;
+};
+
class AddScalarGraph final : public SimpleGraph
{
protected:
@@ -312,6 +483,22 @@ public:
luci::CircleLogistic *logistic = nullptr;
};
+class LogSoftmaxGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ log_softmax = g.nodes()->create<luci::CircleLogSoftmax>();
+ log_softmax->logits(input);
+ log_softmax->name("log_softmax");
+
+ return log_softmax;
+ }
+
+public:
+ luci::CircleLogSoftmax *log_softmax = nullptr;
+};
+
class MaximumGraph final : public SimpleGraph
{
protected:
@@ -642,6 +829,51 @@ public:
luci::CircleConst *const_value = nullptr;
};
+class ReduceMaxGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ rm = g.nodes()->create<luci::CircleReduceMax>();
+ rindices = g.nodes()->create<luci::CircleConst>();
+
+ rm->dtype(loco::DataType::FLOAT32);
+ rindices->dtype(loco::DataType::S32);
+
+ rm->shape(_shape);
+ rindices->shape({static_cast<uint32_t>(_axes.size())});
+
+ rindices->size<loco::DataType::S32>(_axes.size());
+ for (uint32_t i = 0; i < _axes.size(); ++i)
+ {
+ rindices->at<loco::DataType::S32>(i) = _axes[i];
+ }
+
+ rm->input(input);
+ rm->reduction_indices(rindices);
+ rm->keep_dims(_keep_dims);
+
+ rm->name("reduce_max");
+ rindices->name("rindices");
+
+ return rm;
+ }
+
+public:
+ void keep_dims(bool val) { _keep_dims = val; }
+ void axes(std::vector<int32_t> val) { _axes = val; }
+ void shape(std::initializer_list<uint32_t> val) { _shape = val; }
+
+public:
+ luci::CircleReduceMax *rm = nullptr;
+ luci::CircleConst *rindices = nullptr;
+
+private:
+ bool _keep_dims = true;
+ std::vector<int32_t> _axes = {2, 3};
+ std::initializer_list<uint32_t> _shape = {1, 16, 1, 1};
+};
+
class ReluGraph final : public SimpleGraph
{
protected:
@@ -690,6 +922,111 @@ public:
luci::CircleRsqrt *rsqrt = nullptr;
};
+class SoftmaxGraph final : public SimpleGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ softmax = g.nodes()->create<luci::CircleSoftmax>();
+ softmax->logits(input);
+ softmax->name("softmax");
+
+ return softmax;
+ }
+
+public:
+ luci::CircleSoftmax *softmax = nullptr;
+};
+
+class SplitVGraphlet
+{
+public:
+ SplitVGraphlet() = default;
+
+public:
+ void init(loco::Graph *g)
+ {
+ // CircleCustom(SplitV)
+ _splitv = g->nodes()->create<luci::CircleSplitV>();
+ _splitv->shape({1, 2, 2, 192});
+ _splitv->dtype(loco::DataType::FLOAT32);
+ _splitv->name("splitv");
+
+ // CircleConst
+ auto size_splits = g->nodes()->create<luci::CircleConst>();
+ size_splits->dtype(loco::DataType::S32);
+ size_splits->shape({3});
+ size_splits->size<loco::DataType::S32>(3);
+ size_splits->at<loco::DataType::S32>(0) = 32;
+ size_splits->at<loco::DataType::S32>(1) = 32;
+ size_splits->at<loco::DataType::S32>(2) = 128;
+
+ // CircleConst
+ auto split_dim = g->nodes()->create<luci::CircleConst>();
+ split_dim->dtype(loco::DataType::S32);
+ split_dim->rank(0);
+ split_dim->size<loco::DataType::S32>(1);
+ split_dim->scalar<loco::DataType::S32>() = 3;
+
+ _splitv->size_splits(size_splits);
+ _splitv->split_dim(split_dim);
+ _splitv->num_split(3);
+
+ // CircleSplitVOut
+ _splitv_out1 = g->nodes()->create<luci::CircleSplitVOut>();
+ _splitv_out1->shape({1, 2, 2, 32});
+ _splitv_out1->dtype(loco::DataType::FLOAT32);
+ _splitv_out1->index(0);
+ _splitv_out1->input(_splitv);
+ _splitv_out1->name("splitv_out1");
+
+ // CircleSplitVOut
+ _splitv_out2 = g->nodes()->create<luci::CircleSplitVOut>();
+ _splitv_out2->shape({1, 2, 2, 32});
+ _splitv_out2->dtype(loco::DataType::FLOAT32);
+ _splitv_out2->index(1);
+ _splitv_out2->input(_splitv);
+ _splitv_out2->name("splitv_out2");
+
+ // CircleSplitVOut
+ _splitv_out3 = g->nodes()->create<luci::CircleSplitVOut>();
+ _splitv_out3->shape({1, 2, 2, 128});
+ _splitv_out3->dtype(loco::DataType::FLOAT32);
+ _splitv_out3->index(2);
+ _splitv_out3->input(_splitv);
+ _splitv_out3->name("splitv_out3");
+ }
+
+public:
+ luci::CircleSplitV *splitv() { return _splitv; }
+
+protected:
+ luci::CircleSplitV *_splitv = nullptr;
+ luci::CircleSplitVOut *_splitv_out1 = nullptr;
+ luci::CircleSplitVOut *_splitv_out2 = nullptr;
+ luci::CircleSplitVOut *_splitv_out3 = nullptr;
+};
+
+class SplitVGraph : public TestIGraphlet, public TestOsGraphlet<3>, public SplitVGraphlet
+{
+public:
+ SplitVGraph() = default;
+
+ void init(void)
+ {
+ TestIGraphlet::init(g(), {1, 2, 2, 192});
+ TestOsGraphlet<3>::init(g(), {{1, 2, 2, 32}, {1, 2, 2, 32}, {1, 2, 2, 128}});
+ SplitVGraphlet::init(g());
+
+ // connect graph
+ _splitv->input(input());
+
+ output(0)->from(_splitv_out1);
+ output(1)->from(_splitv_out2);
+ output(2)->from(_splitv_out3);
+ }
+};
+
class SquaredDifferenceGraph final : public SimpleGraph
{
protected:
@@ -929,8 +1266,11 @@ TEST(ConvertNCHWToNHWC, AddScalar)
auto new_beta = dynamic_cast<luci::CircleConst *>(g.add->y());
EXPECT_NE(nullptr, new_beta);
- EXPECT_EQ(1, new_beta->rank());
+ EXPECT_EQ(4, new_beta->rank());
EXPECT_EQ(1, new_beta->dim(0).value());
+ EXPECT_EQ(1, new_beta->dim(1).value());
+ EXPECT_EQ(1, new_beta->dim(2).value());
+ EXPECT_EQ(1, new_beta->dim(3).value());
check_pre_trans(g.output->from());
}
@@ -1017,6 +1357,26 @@ TEST(ConvertNCHWToNHWC, Logistic)
EXPECT_EQ(16, g.logistic->dim(3).value());
}
+TEST(ConvertNCHWToNHWC, LogSoftmax)
+{
+ LogSoftmaxGraph g;
+ g.init();
+
+ run_phase(&g.g, true, true);
+
+ check_pre_trans(g.log_softmax->logits());
+
+ auto log_softmax_succs = loco::succs(g.log_softmax);
+ EXPECT_EQ(1, log_softmax_succs.size());
+ check_post_trans(*log_softmax_succs.begin());
+
+ // Check log_softmax shape
+ EXPECT_EQ(1, g.log_softmax->dim(0).value());
+ EXPECT_EQ(4, g.log_softmax->dim(1).value());
+ EXPECT_EQ(4, g.log_softmax->dim(2).value());
+ EXPECT_EQ(16, g.log_softmax->dim(3).value());
+}
+
TEST(ConvertNCHWToNHWC, Maximum)
{
MaximumGraph g;
@@ -1265,8 +1625,11 @@ TEST(ConvertNCHWToNHWC, MulScalar)
auto new_multiplier = dynamic_cast<luci::CircleConst *>(g.mul->y());
EXPECT_NE(nullptr, new_multiplier);
- EXPECT_EQ(1, new_multiplier->rank());
+ EXPECT_EQ(4, new_multiplier->rank());
EXPECT_EQ(1, new_multiplier->dim(0).value());
+ EXPECT_EQ(1, new_multiplier->dim(1).value());
+ EXPECT_EQ(1, new_multiplier->dim(2).value());
+ EXPECT_EQ(1, new_multiplier->dim(3).value());
check_pre_trans(g.output->from());
}
@@ -1451,6 +1814,85 @@ TEST(ConvertNCHWToNHWC, Preserve_Input_Output)
}
}
+TEST(ConvertNCHWToNHWC, ReduceMax)
+{
+ ReduceMaxGraph g;
+ g.init();
+
+ run_phase(&g.g, false, false);
+
+ check_pre_trans(g.rm->input());
+
+ auto rm_succs = loco::succs(g.rm);
+ EXPECT_EQ(1, rm_succs.size());
+ check_post_trans(*rm_succs.begin());
+
+ auto new_rindices = dynamic_cast<luci::CircleConst *>(g.rm->reduction_indices());
+ EXPECT_NE(nullptr, new_rindices);
+ EXPECT_EQ(1, new_rindices->rank());
+ EXPECT_EQ(2, new_rindices->dim(0).value());
+ EXPECT_EQ(2, new_rindices->size<loco::DataType::S32>());
+ EXPECT_EQ(1, new_rindices->at<loco::DataType::S32>(0));
+ EXPECT_EQ(2, new_rindices->at<loco::DataType::S32>(1));
+}
+
+TEST(ConvertNCHWToNHWC, ReduceMax_keep_dims_false)
+{
+ struct TC
+ {
+ std::vector<int32_t> nchw_ind;
+ std::vector<int32_t> nhwc_ind;
+ std::initializer_list<uint32_t> shape;
+ bool needs_transpose = false;
+ };
+
+ uint32_t n = 1;
+ uint32_t c = 16;
+ uint32_t h = 4;
+ uint32_t w = 4;
+
+ std::vector<TC> test_cases{{{0}, {0}, {c, h, w}, true}, {{1}, {3}, {n, h, w}, false},
+ {{2}, {1}, {n, c, w}, true}, {{3}, {2}, {n, c, h}, true},
+ {{0, 1}, {0, 3}, {h, w}, false}, {{0, 2}, {0, 1}, {c, w}, true},
+ {{0, 3}, {0, 2}, {c, h}, true}, {{1, 2}, {3, 1}, {n, w}, false},
+ {{1, 3}, {3, 2}, {n, h}, false}, {{2, 3}, {1, 2}, {n, c}, false},
+ {{0, 1, 2}, {0, 3, 1}, {w}, false}};
+
+ for (auto &tc : test_cases)
+ {
+ ReduceMaxGraph g;
+ g.keep_dims(false);
+ g.axes(tc.nchw_ind);
+ g.shape(tc.shape);
+ g.init();
+
+ run_phase(&g.g, true, true);
+
+ check_pre_trans(g.rm->input());
+
+ auto rm_succs = loco::succs(g.rm);
+ EXPECT_EQ(1, rm_succs.size());
+ if (tc.needs_transpose)
+ {
+ EXPECT_NE(nullptr, dynamic_cast<luci::CircleTranspose *>(*rm_succs.begin()));
+ }
+ else
+ {
+ EXPECT_NE(nullptr, dynamic_cast<luci::CircleOutput *>(*rm_succs.begin()));
+ }
+
+ auto new_rindices = dynamic_cast<luci::CircleConst *>(g.rm->reduction_indices());
+ EXPECT_NE(nullptr, new_rindices);
+ EXPECT_EQ(1, new_rindices->rank());
+ EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->dim(0).value());
+ EXPECT_EQ(tc.nhwc_ind.size(), new_rindices->size<loco::DataType::S32>());
+ for (uint32_t i = 0; i < tc.nhwc_ind.size(); ++i)
+ {
+ EXPECT_EQ(tc.nhwc_ind[i], new_rindices->at<loco::DataType::S32>(i));
+ }
+ }
+}
+
TEST(ConvertNCHWToNHWC, Relu)
{
ReluGraph g;
@@ -1511,6 +1953,57 @@ TEST(ConvertNCHWToNHWC, Rsqrt)
EXPECT_EQ(16, g.rsqrt->dim(3).value());
}
+TEST(ConvertNCHWToNHWC, Softmax)
+{
+ SoftmaxGraph g;
+ g.init();
+
+ run_phase(&g.g, true, true);
+
+ check_pre_trans(g.softmax->logits());
+
+ auto softmax_succs = loco::succs(g.softmax);
+ EXPECT_EQ(1, softmax_succs.size());
+ check_post_trans(*softmax_succs.begin());
+
+ // Check softmax shape
+ EXPECT_EQ(1, g.softmax->dim(0).value());
+ EXPECT_EQ(4, g.softmax->dim(1).value());
+ EXPECT_EQ(4, g.softmax->dim(2).value());
+ EXPECT_EQ(16, g.softmax->dim(3).value());
+}
+
+TEST(ConvertNCHWToNHWC, SplitV)
+{
+ SplitVGraph g;
+ g.init();
+
+ run_phase(g.g(), true, true);
+
+ check_pre_trans(g.splitv()->input());
+
+ auto splitv_succs = loco::succs(g.splitv());
+ for (auto svo : loco::succs(g.splitv()))
+ {
+ for (auto succ : loco::succs(svo))
+ {
+ check_post_trans(succ);
+ }
+ }
+
+ // Check splitv() shape
+ EXPECT_EQ(1, g.splitv()->dim(0).value());
+ EXPECT_EQ(2, g.splitv()->dim(1).value());
+ EXPECT_EQ(192, g.splitv()->dim(2).value());
+ EXPECT_EQ(2, g.splitv()->dim(3).value());
+
+ // Check axis
+ auto axis = dynamic_cast<luci::CircleConst *>(g.splitv()->split_dim());
+ EXPECT_NE(nullptr, axis);
+ EXPECT_EQ(1, axis->size<loco::DataType::S32>());
+ EXPECT_EQ(2, axis->at<loco::DataType::S32>(0));
+}
+
TEST(ConvertNCHWToNHWC, SquaredDifference)
{
SquaredDifferenceGraph g;
@@ -1602,3 +2095,31 @@ TEST(ConvertNCHWToNHWC, SubScalar)
check_pre_trans(g.output->from());
}
+
+TEST(ConvertNCHWToNHWC, Not_Closed_Case1_NEG)
+{
+ NoPostReshapeGraph g;
+ g.init();
+
+ run_phase(&g.g, true, true);
+
+ check_pre_trans(g.relu->features());
+
+ auto relu_succs = loco::succs(g.relu);
+ EXPECT_EQ(1, relu_succs.size());
+ check_post_trans(*relu_succs.begin());
+}
+
+TEST(ConvertNCHWToNHWC, Not_Closed_Case2_NEG)
+{
+ ReluNotClosedGraph g;
+ g.init();
+
+ run_phase(&g.g, true, true);
+
+ check_pre_trans(g.relu->features());
+
+ auto relu_succs = loco::succs(g.relu);
+ EXPECT_EQ(1, relu_succs.size());
+ check_post_trans(*relu_succs.begin());
+}
diff --git a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp
index 11970fff5..72f590135 100644
--- a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp
+++ b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp
@@ -184,8 +184,63 @@ struct FakeQuantize final : public luci::CircleNodeMutableVisitor<void>
// For non-const activation, insert Quantize-Dequantize Ops
// and dequantize the node
- void visit(luci::CircleConv2D *node) { fq_activation(node); }
void visit(luci::CircleAdd *node) { fq_activation(node); }
+ void visit(luci::CircleAveragePool2D *node) { fq_activation(node); }
+ void visit(luci::CircleBatchMatMul *node) { fq_activation(node); }
+ void visit(luci::CircleConv2D *node) { fq_activation(node); }
+ void visit(luci::CircleDepthwiseConv2D *node) { fq_activation(node); }
+ void visit(luci::CircleDiv *node) { fq_activation(node); }
+ void visit(luci::CircleFullyConnected *node) { fq_activation(node); }
+ void visit(luci::CircleInstanceNorm *node) { fq_activation(node); }
+ void visit(luci::CircleLeakyRelu *node) { fq_activation(node); }
+ void visit(luci::CircleLogistic *node) { fq_activation(node); }
+ void visit(luci::CircleLogSoftmax *node) { fq_activation(node); }
+ void visit(luci::CircleMaxPool2D *node) { fq_activation(node); }
+ void visit(luci::CircleMul *node) { fq_activation(node); }
+ void visit(luci::CircleNeg *node) { fq_activation(node); }
+ void visit(luci::CirclePad *node) { fq_activation(node); }
+ void visit(luci::CirclePRelu *node) { fq_activation(node); }
+ void visit(luci::CircleMean *node) { fq_activation(node); }
+ void visit(luci::CircleReduceMax *node) { fq_activation(node); }
+ void visit(luci::CircleRelu *node) { fq_activation(node); }
+ void visit(luci::CircleRelu6 *node) { fq_activation(node); }
+ void visit(luci::CircleResizeBilinear *node) { fq_activation(node); }
+ void visit(luci::CircleResizeNearestNeighbor *node) { fq_activation(node); }
+ void visit(luci::CircleRsqrt *node) { fq_activation(node); }
+ void visit(luci::CircleSoftmax *node) { fq_activation(node); }
+ void visit(luci::CircleSqrt *node) { fq_activation(node); }
+ void visit(luci::CircleTanh *node) { fq_activation(node); }
+ void visit(luci::CircleTransposeConv *node) { fq_activation(node); }
+
+ // For Ops that do not change the value of input, do nothing
+ // (dtype will be automatically updated by type inference)
+ void visit(luci::CircleCast *) {}
+ void visit(luci::CircleConcatenation *) {}
+ void visit(luci::CircleGather *) {}
+ void visit(luci::CircleSlice *) {}
+ void visit(luci::CircleStridedSlice *) {}
+ void visit(luci::CircleReshape *) {}
+ void visit(luci::CircleSplit *) {}
+ void visit(luci::CircleSplitOut *) {}
+ void visit(luci::CircleSplitV *) {}
+ void visit(luci::CircleSplitVOut *) {}
+ void visit(luci::CircleTranspose *) {}
+
+ // For Ops that return index, fake quantization is unnecessary
+ void visit(luci::CircleArgMax *) {}
+
+ // Virtual node
+ void visit(luci::CircleOutputExclude *) {}
+
+ void visit(luci::CircleQuantize *node)
+ {
+ RETURN_UNLESS(is_quant_act(node));
+
+ insert_dequantize(node);
+ }
+
+ // Dequantize Op does nothing in fp32 model
+ void visit(luci::CircleDequantize *) {}
};
#undef RETURN_UNLESS
diff --git a/compiler/luci/pass/src/FoldDensifyPass.cpp b/compiler/luci/pass/src/FoldDensifyPass.cpp
new file mode 100644
index 000000000..5ddc743e5
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDensifyPass.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDensifyPass.h"
+#include "helpers/SparsityFormatConverter.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+#include <cassert>
+#include <vector>
+
+namespace
+{
+
+bool is_foldable_const(luci::CircleConst *node)
+{
+ if (node->sparsityparam() == nullptr)
+ return false;
+
+ if (node->dtype() == loco::DataType::FLOAT32)
+ return true;
+ if (node->dtype() == loco::DataType::FLOAT16)
+ return true;
+
+ return false;
+}
+
+luci::CircleConst *densified_const_node(luci::CircleConst *const_node)
+{
+ assert(const_node->sparsityparam());
+
+ auto name = const_node->name();
+ assert(name.length() > 0);
+ auto g = const_node->graph();
+ auto new_const_node = g->nodes()->create<luci::CircleConst>();
+
+ new_const_node->dtype(const_node->dtype());
+ new_const_node->rank(const_node->rank());
+
+ uint32_t dim_size = 1;
+ std::vector<int> dense_shape;
+ for (uint32_t i = 0; i < new_const_node->rank(); ++i)
+ {
+ assert(const_node->dim(i).known());
+ new_const_node->dim(i) = const_node->dim(i);
+
+ uint32_t value = const_node->dim(i).value();
+ dim_size *= value;
+ dense_shape.emplace_back(static_cast<int32_t>(value));
+ }
+
+ if (const_node->dtype() == loco::DataType::FLOAT32)
+ new_const_node->size<loco::DataType::FLOAT32>(dim_size);
+ else
+ {
+ assert(const_node->dtype() == loco::DataType::FLOAT16);
+ new_const_node->size<loco::DataType::FLOAT16>(dim_size);
+ }
+
+ new_const_node->shape_status(luci::ShapeStatus::VALID);
+ new_const_node->name(name + "_DS");
+
+ if (const_node->dtype() == loco::DataType::FLOAT32)
+ {
+ auto const_items = const_node->size<loco::DataType::FLOAT32>();
+ auto f_data = std::make_unique<float[]>(const_items);
+ for (size_t i = 0; i < const_items; ++i)
+ f_data[i] = const_node->at<loco::DataType::FLOAT32>(i);
+
+ sparsity::TfLiteSparsity sp = to_tflite_sparsity(const_node->sparsityparam());
+ sparsity::FormatConverter<float> converter(dense_shape, sp);
+ converter.SparseToDense(f_data.get());
+ const auto &data_dense = converter.GetData();
+ assert(data_dense.size() == dim_size);
+
+ for (uint32_t i = 0; i < dim_size; ++i)
+ new_const_node->at<loco::DataType::FLOAT32>(i) = data_dense[i];
+
+ luci::freeTfLiteSparsity(sp);
+ }
+ else
+ {
+ assert(const_node->dtype() == loco::DataType::FLOAT16);
+
+ auto const_items = const_node->size<loco::DataType::FLOAT16>();
+ auto f_data = std::make_unique<uint16_t[]>(const_items);
+ for (size_t i = 0; i < const_items; ++i)
+ f_data[i] = const_node->at<loco::DataType::FLOAT16>(i);
+
+ // Primitive type for FLOAT16 is UINT16
+ sparsity::TfLiteSparsity sp = to_tflite_sparsity(const_node->sparsityparam());
+ sparsity::FormatConverter<uint16_t> converter(dense_shape, sp);
+ converter.SparseToDense(f_data.get());
+ const auto &data_dense = converter.GetData();
+ assert(data_dense.size() == dim_size);
+ for (uint32_t i = 0; i < dim_size; ++i)
+ new_const_node->at<loco::DataType::FLOAT16>(i) = data_dense[i];
+
+ luci::freeTfLiteSparsity(sp);
+ }
+
+ return new_const_node;
+}
+
+/**
+ * @brief Fold Densify if input is Sparse Constant
+ */
+bool fold_densify(luci::CircleDensify *densify)
+{
+ auto const_input = dynamic_cast<luci::CircleConst *>(densify->input());
+ if (not const_input)
+ return false;
+
+ if (not is_foldable_const(const_input))
+ return false;
+
+ auto dense_const = densified_const_node(const_input);
+ assert(dense_const);
+
+ loco::replace(densify).with(dense_const);
+ luci::add_origin(dense_const, luci::composite_origin(
+ {luci::get_origin(densify), luci::get_origin(const_input)}));
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *
+ * [CircleConst](sparse)
+ * |
+ * [CircleDensify]
+ * |
+ * [CircleNode]
+ * |
+ *
+ * AFTER
+ *
+ * [CircleConst](dense) [CircleConst](sparse)
+ * | |
+ * [CircleNode] [CircleDensify]
+ * |
+ */
+bool FoldDensifyPass::run(loco::Graph *g)
+{
+ bool changed = false;
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto densify = dynamic_cast<luci::CircleDensify *>(node))
+ {
+ if (fold_densify(densify))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldDensifyPass.test.cpp b/compiler/luci/pass/src/FoldDensifyPass.test.cpp
new file mode 100644
index 000000000..2f9736f49
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDensifyPass.test.cpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDensifyPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class FoldDensifyPassGraph : public luci::ConstantFoldingAddTestGraph
+{
+public:
+ FoldDensifyPassGraph(std::initializer_list<uint32_t> shape)
+ : luci::ConstantFoldingAddTestGraph(shape, loco::DataType::FLOAT32)
+ {
+ _densify = _g.nodes()->create<luci::CircleDensify>();
+ _x = _g.nodes()->create<luci::CircleConst>();
+
+ _densify->dtype(loco::DataType::FLOAT32);
+ _x->dtype(loco::DataType::FLOAT32);
+
+ _densify->shape(shape);
+ _x->shape(shape);
+
+ _densify->input(_x);
+
+ _densify->name("densify");
+ _x->name("x");
+ }
+
+ loco::Node *createFoldedPattern() override { return _densify; }
+
+public:
+ void fill_const_dense(void)
+ {
+ uint32_t num_elems = 1;
+ for (uint32_t r = 0; r < _x->rank(); ++r)
+ num_elems *= _x->dim(r).value();
+
+ _x->size<loco::DataType::FLOAT32>(num_elems);
+ for (uint32_t i = 0; i < num_elems; i++)
+ _x->at<loco::DataType::FLOAT32>(i) = static_cast<float>(i + 1);
+ }
+
+ void fill_const_sparse(void)
+ {
+ // fill 4x4 of
+ // [[1 0 0 0]
+ // [0 2 0 0]
+ // [0 0 3 0]
+ // [0 0 0 4]]
+
+ // values of 1.0, 2.0, 3.0, 4.0
+ uint32_t udata[] = {0x3f800000, 0x40000000, 0x40400000, 0x40800000};
+ float *fdata = reinterpret_cast<float *>(udata);
+
+ _x->size<loco::DataType::FLOAT32>(4);
+ for (uint32_t i = 0; i < 4; i++)
+ _x->at<loco::DataType::FLOAT32>(i) = fdata[i];
+
+ auto sparsityparam = std::make_unique<luci::SparsityParam>();
+ sparsityparam->traversal_order = std::vector<int32_t>({0, 1});
+ sparsityparam->block_map = std::vector<int32_t>({});
+
+ auto dm0 = luci::DimMetaData(luci::DimensionType::DENSE, 4);
+
+ std::vector<int32_t> as_vec = {0, 1, 2, 3, 4};
+ std::vector<int32_t> ai_vec = {0, 1, 2, 3};
+ auto as = luci::SparseIndexVector(luci::SparseIndexVectorType::I32, as_vec);
+ auto ai = luci::SparseIndexVector(luci::SparseIndexVectorType::I32, ai_vec);
+ auto dm1 = luci::DimMetaData(luci::DimensionType::SPARSE_CSR, 0, as, ai);
+ sparsityparam->dim_metadata.emplace_back(dm0);
+ sparsityparam->dim_metadata.emplace_back(dm1);
+
+ _x->sparsityparam(std::move(sparsityparam));
+ }
+
+protected:
+ luci::CircleDensify *_densify = nullptr;
+ luci::CircleConst *_x = nullptr;
+};
+
+class FoldDensifyPassGraphTest : public FoldDensifyPassGraph, public ::testing::Test
+{
+public:
+ FoldDensifyPassGraphTest() : FoldDensifyPassGraph({4, 4}) {}
+
+ virtual void SetUp() { init(); }
+};
+
+} // namespace
+
+TEST(FoldDensifyPassGraph, name)
+{
+ luci::FoldDensifyPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FoldDensifyPassGraphTest, no_sparsity_param_NEG)
+{
+ fill_const_dense();
+
+ luci::FoldDensifyPass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(FoldDensifyPassGraphTest, sparsity_param)
+{
+ fill_const_sparse();
+
+ luci::FoldDensifyPass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_NE(nullptr, folded_const);
+
+ EXPECT_EQ(2, folded_const->rank());
+ EXPECT_EQ(4, folded_const->dim(0).value());
+ EXPECT_EQ(4, folded_const->dim(1).value());
+ EXPECT_EQ(16, folded_const->size<loco::DataType::FLOAT32>());
+ for (int y = 0; y < 4; ++y)
+ {
+ for (int x = 0; x < 4; ++x)
+ {
+ float ovalue = folded_const->at<loco::DataType::FLOAT32>(y * 4 + x);
+ float fvalue = 0.0;
+ if (x == y)
+ {
+ // diagonal position
+ fvalue = static_cast<float>(y + 1);
+ }
+ EXPECT_EQ(fvalue, ovalue);
+ }
+ }
+}
diff --git a/compiler/luci/pass/src/FoldDequantizePass.cpp b/compiler/luci/pass/src/FoldDequantizePass.cpp
index 3dd4f8cea..b6526deb0 100644
--- a/compiler/luci/pass/src/FoldDequantizePass.cpp
+++ b/compiler/luci/pass/src/FoldDequantizePass.cpp
@@ -19,6 +19,8 @@
#include <luci/IR/CircleNodes.h>
#include <luci/Profile/CircleNodeOrigin.h>
+#include <fp16.h>
+
namespace
{
@@ -32,6 +34,9 @@ bool is_hybrid_kernel_supported(loco::Node *node)
bool is_foldable_const(luci::CircleConst *node)
{
+ if (node->dtype() == loco::DataType::FLOAT16)
+ return true;
+
if (node->quantparam() == nullptr)
return false;
@@ -39,17 +44,18 @@ bool is_foldable_const(luci::CircleConst *node)
return true;
if (node->dtype() == loco::DataType::U8)
return true;
+ if (node->dtype() == loco::DataType::S16)
+ return true;
+ if (node->dtype() == loco::DataType::S32)
+ return true;
+ if (node->dtype() == loco::DataType::S64)
+ return true;
return false;
}
luci::CircleConst *dequantized_const_node(luci::CircleConst *const_node)
{
- if (const_node->quantparam() == nullptr)
- {
- throw std::runtime_error("Given constant node has no quantization parameter");
- }
-
auto name = const_node->name();
assert(name.length() > 0);
auto g = const_node->graph();
@@ -67,38 +73,70 @@ luci::CircleConst *dequantized_const_node(luci::CircleConst *const_node)
new_const_node->shape_status(luci::ShapeStatus::VALID);
new_const_node->name(name + "_DQ");
+ if (const_node->dtype() == loco::DataType::FLOAT16)
+ {
+ for (uint32_t i = 0; i < new_const_node->size<loco::DataType::FLOAT32>(); ++i)
+ {
+ auto raw = const_node->at<loco::DataType::FLOAT16>(i);
+ new_const_node->at<loco::DataType::FLOAT32>(i) = fp16_ieee_to_fp32_value(raw);
+ }
+ return new_const_node;
+ }
+
+ if (const_node->quantparam() == nullptr)
+ {
+ throw std::runtime_error("Given constant node has no quantization parameter");
+ }
+
const int32_t q_dim = const_node->quantparam()->quantized_dimension;
- const int32_t q_dim_value = const_node->dim(q_dim).value();
+ // For scalar, q_dim_value is 1
+ // For non-scalar, q_dim_value is the size of quantized dimension
+ const int32_t q_dim_value = const_node->rank() == 0 ? 1 : const_node->dim(q_dim).value();
int32_t right_count = q_dim_value;
for (uint32_t i = q_dim + 1; i < const_node->rank(); ++i)
right_count *= const_node->dim(i).value();
- if (const_node->dtype() == loco::DataType::S8)
+ for (uint32_t i = 0; i < new_const_node->size<loco::DataType::FLOAT32>(); ++i)
{
- for (uint32_t i = 0; i < const_node->size<loco::DataType::S8>(); ++i)
- {
- uint32_t qd = (i % right_count) / (right_count / q_dim_value);
- if (qd >= const_node->quantparam()->zerop.size())
- qd = 0;
+ uint32_t qd = (i % right_count) / (right_count / q_dim_value);
+ if (qd >= const_node->quantparam()->zerop.size())
+ qd = 0;
- new_const_node->at<loco::DataType::FLOAT32>(i) =
- (float)(const_node->at<loco::DataType::S8>(i) - const_node->quantparam()->zerop.at(qd)) *
- const_node->quantparam()->scale.at(qd);
- }
- }
- else
- {
- for (uint32_t i = 0; i < const_node->size<loco::DataType::U8>(); ++i)
+ switch (const_node->dtype())
{
- uint32_t qd = (i % right_count) / (right_count / q_dim_value);
- if (qd >= const_node->quantparam()->zerop.size())
- qd = 0;
-
- new_const_node->at<loco::DataType::FLOAT32>(i) =
- (float)((int)const_node->at<loco::DataType::U8>(i) -
- const_node->quantparam()->zerop.at(qd)) *
- const_node->quantparam()->scale.at(qd);
+ case loco::DataType::S8:
+ new_const_node->at<loco::DataType::FLOAT32>(i) =
+ static_cast<float>(const_node->at<loco::DataType::S8>(i) -
+ const_node->quantparam()->zerop.at(qd)) *
+ const_node->quantparam()->scale.at(qd);
+ break;
+ case loco::DataType::S16:
+ new_const_node->at<loco::DataType::FLOAT32>(i) =
+ static_cast<float>(const_node->at<loco::DataType::S16>(i) -
+ const_node->quantparam()->zerop.at(qd)) *
+ const_node->quantparam()->scale.at(qd);
+ break;
+ case loco::DataType::S32:
+ new_const_node->at<loco::DataType::FLOAT32>(i) =
+ static_cast<float>(const_node->at<loco::DataType::S32>(i) -
+ const_node->quantparam()->zerop.at(qd)) *
+ const_node->quantparam()->scale.at(qd);
+ break;
+ case loco::DataType::S64:
+ new_const_node->at<loco::DataType::FLOAT32>(i) =
+ static_cast<float>(const_node->at<loco::DataType::S64>(i) -
+ const_node->quantparam()->zerop.at(qd)) *
+ const_node->quantparam()->scale.at(qd);
+ break;
+ case loco::DataType::U8:
+ new_const_node->at<loco::DataType::FLOAT32>(i) =
+ static_cast<float>(const_node->at<loco::DataType::U8>(i) -
+ const_node->quantparam()->zerop.at(qd)) *
+ const_node->quantparam()->scale.at(qd);
+ break;
+ default:
+ throw std::runtime_error("Not supported dtype for FoldDequantizePass");
}
}
@@ -160,7 +198,7 @@ bool FoldDequantizePass::run(loco::Graph *g)
{
bool changed = false;
- for (auto node : loco::all_nodes(g))
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
if (auto circle_dequant = dynamic_cast<luci::CircleDequantize *>(node))
{
diff --git a/compiler/luci/pass/src/FoldDequantizePass.test.cpp b/compiler/luci/pass/src/FoldDequantizePass.test.cpp
index d82a7bc87..fb5b6adc0 100644
--- a/compiler/luci/pass/src/FoldDequantizePass.test.cpp
+++ b/compiler/luci/pass/src/FoldDequantizePass.test.cpp
@@ -15,12 +15,389 @@
*/
#include "luci/Pass/FoldDequantizePass.h"
+#include "PassTestGraphs.h"
#include <gtest/gtest.h>
+namespace
+{
+
+template <loco::DataType DT>
+class FoldDequantizeTest : public luci::ConstantFoldingAddTestGraph, public ::testing::Test
+{
+public:
+ FoldDequantizeTest() : luci::ConstantFoldingAddTestGraph({2, 2, 2}, DT) {}
+
+ virtual void SetUp() { init(); }
+
+ loco::Node *createFoldedPattern() override
+ {
+ _dequantize = _g.nodes()->create<luci::CircleDequantize>();
+ _input = _g.nodes()->create<luci::CircleConst>();
+
+ _dequantize->dtype(loco::DataType::FLOAT32);
+ _input->dtype(DT);
+
+ _input->shape({2, 2, 2});
+
+ _input->size<DT>(8);
+ _input->at<DT>(0) = 0;
+ _input->at<DT>(1) = 1;
+ _input->at<DT>(2) = 2;
+ _input->at<DT>(3) = 3;
+ _input->at<DT>(4) = 4;
+ _input->at<DT>(5) = 5;
+ _input->at<DT>(6) = 6;
+ _input->at<DT>(7) = 7;
+
+ auto qparam = std::make_unique<luci::CircleQuantParam>();
+ qparam->quantized_dimension = 1;
+ qparam->scale.push_back(5.0);
+ qparam->scale.push_back(10.0);
+ qparam->zerop.push_back(1);
+ qparam->zerop.push_back(2);
+ _input->quantparam(std::move(qparam));
+
+ _dequantize->input(_input);
+
+ _dequantize->name("dequantize");
+ _input->name("input");
+
+ return _dequantize;
+ }
+
+ void createScalarPattern()
+ {
+ _input->rank(0);
+ _input->size<DT>(1);
+ _input->at<DT>(0) = 1;
+
+ auto qparam = std::make_unique<luci::CircleQuantParam>();
+ qparam->quantized_dimension = 0;
+ qparam->scale.push_back(1.0);
+ qparam->zerop.push_back(0);
+ _input->quantparam(std::move(qparam));
+ }
+
+ void createNotFoldablePattern() { _input->quantparam(nullptr); }
+
+protected:
+ luci::CircleDequantize *_dequantize = nullptr;
+ luci::CircleConst *_input = nullptr;
+};
+
+class S8FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S8>
+{
+};
+
+class S16FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S16>
+{
+};
+
+class S32FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S32>
+{
+};
+
+class S64FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::S64>
+{
+};
+
+class U8FoldDequantizeTest : public FoldDequantizeTest<loco::DataType::U8>
+{
+};
+
+class F16FoldDequantizeTest : public luci::ConstantFoldingTestGraph, public ::testing::Test
+{
+public:
+ F16FoldDequantizeTest() : ConstantFoldingTestGraph({2, 2}, loco::DataType::FLOAT16) {}
+
+ virtual void SetUp() { init(); }
+
+ loco::Node *createFoldedPattern() override
+ {
+ const auto DT = loco::DataType::FLOAT16;
+ _dequantize = _g.nodes()->create<luci::CircleDequantize>();
+ _f16const = _g.nodes()->create<luci::CircleConst>();
+
+ _dequantize->dtype(loco::DataType::FLOAT32);
+ _f16const->dtype(DT);
+
+ _f16const->shape({2, 2});
+
+ _f16const->size<loco::DataType::FLOAT16>(4);
+ _f16const->at<DT>(0) = 49408; // -2.5f
+ _f16const->at<DT>(1) = 47104; // -0.5f
+ _f16const->at<DT>(2) = 0; // 0.0f
+ _f16const->at<DT>(3) = 15872; // 1.5f
+ // NOTE how to get uint16_t value of float16 ?
+ // Use compiler/souschef/src/Gaussian.cpp GaussianFloat16DataChef::generate()
+ // uint16_t value = fp16_ieee_from_fp32_value(-2.5);
+ // printf("-2.5 = %u\r\n", value);
+
+ _dequantize->input(_f16const);
+
+ _dequantize->name("dequantize");
+ _f16const->name("input");
+
+ _output->from(_dequantize);
+
+ return _dequantize;
+ }
+
+ void createNotFoldablePattern() { _dequantize->input(_input); }
+
+protected:
+ luci::CircleConst *getFoldedPattern() override
+ {
+ return dynamic_cast<luci::CircleConst *>(_output->from());
+ }
+
+ void init() override { createFoldedPattern(); }
+
+protected:
+ luci::CircleDequantize *_dequantize = nullptr;
+ luci::CircleConst *_f16const = nullptr;
+};
+
+} // namespace
+
TEST(FoldDequantizePassTest, name)
{
luci::FoldDequantizePass pass;
auto const name = pass.name();
ASSERT_NE(nullptr, name);
}
+
+TEST_F(U8FoldDequantizeTest, fold_dequant_basic)
+{
+ luci::FoldDequantizePass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_NE(nullptr, folded_const);
+
+ // Chec type, shape, values of folded const
+ EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+ EXPECT_EQ(3, folded_const->rank());
+ EXPECT_EQ(2, folded_const->dim(0).value());
+ EXPECT_EQ(2, folded_const->dim(1).value());
+ EXPECT_EQ(2, folded_const->dim(2).value());
+ EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+ EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+ EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+ EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+ EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+ EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+ EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+ EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(U8FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+ createNotFoldablePattern();
+
+ luci::FoldDequantizePass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(S8FoldDequantizeTest, fold_dequant_basic)
+{
+ luci::FoldDequantizePass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_NE(nullptr, folded_const);
+
+ // Chec type, shape, values of folded const
+ EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+ EXPECT_EQ(3, folded_const->rank());
+ EXPECT_EQ(2, folded_const->dim(0).value());
+ EXPECT_EQ(2, folded_const->dim(1).value());
+ EXPECT_EQ(2, folded_const->dim(2).value());
+ EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+ EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+ EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+ EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+ EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+ EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+ EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+ EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(S8FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+ createNotFoldablePattern();
+
+ luci::FoldDequantizePass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(S16FoldDequantizeTest, fold_dequant_basic)
+{
+ luci::FoldDequantizePass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_NE(nullptr, folded_const);
+
+ // Chec type, shape, values of folded const
+ EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+ EXPECT_EQ(3, folded_const->rank());
+ EXPECT_EQ(2, folded_const->dim(0).value());
+ EXPECT_EQ(2, folded_const->dim(1).value());
+ EXPECT_EQ(2, folded_const->dim(2).value());
+ EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+ EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+ EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+ EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+ EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+ EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+ EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+ EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(S16FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+ createNotFoldablePattern();
+
+ luci::FoldDequantizePass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(S32FoldDequantizeTest, fold_dequant_basic)
+{
+ luci::FoldDequantizePass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_NE(nullptr, folded_const);
+
+ // Chec type, shape, values of folded const
+ EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+ EXPECT_EQ(3, folded_const->rank());
+ EXPECT_EQ(2, folded_const->dim(0).value());
+ EXPECT_EQ(2, folded_const->dim(1).value());
+ EXPECT_EQ(2, folded_const->dim(2).value());
+ EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+ EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+ EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+ EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+ EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+ EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+ EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+ EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(S32FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+ createNotFoldablePattern();
+
+ luci::FoldDequantizePass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(S64FoldDequantizeTest, fold_dequant_basic)
+{
+ luci::FoldDequantizePass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_NE(nullptr, folded_const);
+
+ // Chec type, shape, values of folded const
+ EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+ EXPECT_EQ(3, folded_const->rank());
+ EXPECT_EQ(2, folded_const->dim(0).value());
+ EXPECT_EQ(2, folded_const->dim(1).value());
+ EXPECT_EQ(2, folded_const->dim(2).value());
+ EXPECT_EQ(-5.0, folded_const->at<loco::DataType::FLOAT32>(0));
+ EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(1));
+ EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+ EXPECT_EQ(10.0, folded_const->at<loco::DataType::FLOAT32>(3));
+ EXPECT_EQ(15.0, folded_const->at<loco::DataType::FLOAT32>(4));
+ EXPECT_EQ(20.0, folded_const->at<loco::DataType::FLOAT32>(5));
+ EXPECT_EQ(40.0, folded_const->at<loco::DataType::FLOAT32>(6));
+ EXPECT_EQ(50.0, folded_const->at<loco::DataType::FLOAT32>(7));
+}
+
+TEST_F(S64FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+ createNotFoldablePattern();
+
+ luci::FoldDequantizePass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_EQ(nullptr, folded_const);
+}
+
+TEST_F(U8FoldDequantizeTest, fold_dequant_scalar)
+{
+ createScalarPattern();
+
+ luci::FoldDequantizePass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_NE(nullptr, folded_const);
+
+ // Check type, shape, values of folded const
+ EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+ EXPECT_EQ(0, folded_const->rank());
+ EXPECT_EQ(1.0, folded_const->at<loco::DataType::FLOAT32>(0));
+}
+
+TEST_F(F16FoldDequantizeTest, fold_dequant_basic)
+{
+ luci::FoldDequantizePass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_NE(nullptr, folded_const);
+
+ // Chec type, shape, values of folded const
+ EXPECT_EQ(loco::DataType::FLOAT32, folded_const->dtype());
+ EXPECT_EQ(2, folded_const->rank());
+ EXPECT_EQ(2, folded_const->dim(0).value());
+ EXPECT_EQ(2, folded_const->dim(1).value());
+ EXPECT_EQ(-2.5, folded_const->at<loco::DataType::FLOAT32>(0));
+ EXPECT_EQ(-0.5, folded_const->at<loco::DataType::FLOAT32>(1));
+ EXPECT_EQ(0.0, folded_const->at<loco::DataType::FLOAT32>(2));
+ EXPECT_EQ(1.5, folded_const->at<loco::DataType::FLOAT32>(3));
+}
+
+TEST_F(F16FoldDequantizeTest, fold_dequant_basic_NEG)
+{
+ createNotFoldablePattern();
+
+ luci::FoldDequantizePass pass;
+ while (pass.run(graph()))
+ ;
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_EQ(nullptr, folded_const);
+}
diff --git a/compiler/luci/pass/src/FoldSparseToDensePass.cpp b/compiler/luci/pass/src/FoldSparseToDensePass.cpp
index 0c6fc43ed..ed60d8899 100644
--- a/compiler/luci/pass/src/FoldSparseToDensePass.cpp
+++ b/compiler/luci/pass/src/FoldSparseToDensePass.cpp
@@ -19,6 +19,8 @@
#include <luci/IR/CircleNodes.h>
+#include <limits>
+
namespace
{
diff --git a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp
index 2c990f0a5..bc09abee2 100644
--- a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp
+++ b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.cpp
@@ -22,6 +22,7 @@
#include <luci/Profile/CircleNodeOrigin.h>
#include <luci/Service/CircleShapeInference.h>
#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Service/CircleNodeClone.h>
namespace
{
@@ -55,6 +56,26 @@ void copy_shape(luci::CircleReshape *reshape, luci::CircleReshape *new_reshape)
new_reshape->newShape()->dim(r) = reshape->newShape()->dim(r);
}
+luci::CircleReshape *create_cloned_reshape(luci::CircleReshape *reshape)
+{
+ assert(reshape != nullptr); // FIX_CALLER_UNLESS
+
+ luci::CircleConst *cloned_shape = clone_shape(reshape);
+ if (cloned_shape == nullptr)
+ return nullptr;
+
+ auto cloned_node = luci::clone_node(reshape, reshape->graph());
+ if (cloned_node == nullptr)
+ return nullptr;
+
+ auto new_reshape = loco::must_cast<luci::CircleReshape *>(cloned_node);
+ new_reshape->shape(cloned_shape);
+ new_reshape->name(reshape->name() + "_C");
+ luci::add_origin(new_reshape, luci::get_origin(reshape));
+
+ return new_reshape;
+}
+
bool forward_reshape(luci::CircleReshape *reshape, luci::CircleNeg *neg)
{
assert(reshape != nullptr);
@@ -85,6 +106,26 @@ bool forward_reshape(luci::CircleReshape *reshape, luci::CircleNeg *neg)
return true;
}
+bool forward_reshape(luci::CircleReshape *reshape, luci::CircleLogistic *logit)
+{
+ assert(reshape != nullptr); // FIX_CALLER_UNLESS
+ assert(logit != nullptr); // FIX_CALLER_UNLESS
+
+ auto new_reshape = create_cloned_reshape(reshape);
+ if (not new_reshape)
+ return false;
+
+ // reconnect network
+ loco::replace(logit).with(new_reshape);
+ logit->x(reshape->tensor());
+ new_reshape->tensor(logit);
+
+ // Do shape inference for this node again.
+ logit->shape_status(luci::ShapeStatus::UNDEFINED);
+
+ return true;
+}
+
class ForwardReshape final : public luci::CircleNodeMutableVisitor<bool>
{
protected:
@@ -103,6 +144,14 @@ protected:
return forward_reshape(reshape, node);
}
+ bool visit(luci::CircleLogistic *node)
+ {
+ auto reshape = as_reshape(node->x());
+ if (reshape == nullptr)
+ return false;
+
+ return forward_reshape(reshape, node);
+ }
// TODO add more unary operators
};
diff --git a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp
index 2593a014c..373513270 100644
--- a/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp
+++ b/compiler/luci/pass/src/ForwardReshapeToUnaryOpPass.test.cpp
@@ -65,6 +65,42 @@ protected:
luci::CircleConst *_reshape_shape = nullptr;
};
+// TODO Reduce duplicate code with ReshapeNegGraphlet
+class ReshapeLogisticGraphlet
+{
+public:
+ ReshapeLogisticGraphlet() = default;
+
+public:
+ void init(loco::Graph *g, const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ std::vector<uint32_t> shape_out_v = shape_out;
+
+ _reshape_shape = g->nodes()->create<luci::CircleConst>();
+ _reshape = g->nodes()->create<luci::CircleReshape>();
+ _logistic = g->nodes()->create<luci::CircleLogistic>();
+
+ _reshape_shape->dtype(loco::DataType::S32);
+ _reshape_shape->rank(1);
+ _reshape_shape->dim(0).set(shape_out_v.size());
+ _reshape_shape->shape_status(luci::ShapeStatus::VALID);
+ // values
+ const auto size = shape_out_v.size();
+ _reshape_shape->size<loco::DataType::S32>(size);
+ for (uint32_t i = 0; i < size; i++)
+ _reshape_shape->at<loco::DataType::S32>(i) = shape_out_v[i];
+
+ _reshape_shape->name("reshape_shape");
+ _reshape->name("reshape");
+ _logistic->name("logistic");
+ }
+
+protected:
+ luci::CircleReshape *_reshape = nullptr;
+ luci::CircleLogistic *_logistic = nullptr;
+ luci::CircleConst *_reshape_shape = nullptr;
+};
+
class ForwardReshapeToNegGraph : public TestIOGraph, public ReshapeNegGraphlet
{
public:
@@ -85,6 +121,26 @@ public:
}
};
+class ForwardReshapeToLogisticGraph : public TestIOGraph, public ReshapeLogisticGraphlet
+{
+public:
+ ForwardReshapeToLogisticGraph() = default;
+
+public:
+ void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+ {
+ TestIOGraph::init(shape_in, shape_out);
+ ReshapeLogisticGraphlet::init(g(), shape_in, shape_out);
+
+ // connect network
+ _reshape->tensor(input());
+ _reshape->shape(_reshape_shape);
+ _logistic->x(_reshape);
+
+ output()->from(_logistic);
+ }
+};
+
class ForwardReshapeToNegGraphTest : public ::testing::Test
{
public:
@@ -101,6 +157,22 @@ protected:
luci::ForwardReshapeToUnaryOpPass _pass;
};
+class ForwardReshapeToLogisticGraphTest : public ::testing::Test
+{
+public:
+ ForwardReshapeToLogisticGraphTest() = default;
+
+ void run_pass(void)
+ {
+ while (_pass.run(_graph.g()))
+ ;
+ }
+
+protected:
+ ForwardReshapeToLogisticGraph _graph;
+ luci::ForwardReshapeToUnaryOpPass _pass;
+};
+
} // namespace
TEST(ForwardReshapeToUnaryOpPassTest, name)
@@ -123,3 +195,17 @@ TEST_F(ForwardReshapeToNegGraphTest, simple_forward)
neg = dynamic_cast<luci::CircleNeg *>(reshape->tensor());
ASSERT_NE(nullptr, neg);
}
+
+TEST_F(ForwardReshapeToLogisticGraphTest, forward)
+{
+ _graph.init({2, 2, 2}, {2, 4});
+
+ run_pass();
+
+ auto reshape = dynamic_cast<luci::CircleReshape *>(_graph.output()->from());
+ auto log = dynamic_cast<luci::CircleLogistic *>(_graph.output()->from());
+ ASSERT_NE(nullptr, reshape);
+ ASSERT_EQ(nullptr, log);
+ log = dynamic_cast<luci::CircleLogistic *>(reshape->tensor());
+ ASSERT_NE(nullptr, log);
+}
diff --git a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp
index 97a962cb6..3cf31ed10 100644
--- a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp
+++ b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp
@@ -99,6 +99,12 @@ bool fuse_add_with_fc(luci::CircleFullyConnected *fc)
fused_bias->at<loco::DataType::FLOAT32>(i) += const_bias->at<loco::DataType::FLOAT32>(i);
}
+ // At this point, it is guarateed that fused_bias's shape is [1, 1, ..., N] or [N]
+ // where N is weights->dim(0).
+ // The shape is normalized to [N] to become the bias of FC
+ fused_bias->rank(1);
+ fused_bias->dim(0) = weights->dim(0);
+
fc->bias(fused_bias);
fc->fusedActivationFunction(add->fusedActivationFunction());
diff --git a/compiler/luci/pass/src/FuseAddWithTConvPass.cpp b/compiler/luci/pass/src/FuseAddWithTConvPass.cpp
index 2bca57014..852bc8b63 100644
--- a/compiler/luci/pass/src/FuseAddWithTConvPass.cpp
+++ b/compiler/luci/pass/src/FuseAddWithTConvPass.cpp
@@ -37,10 +37,10 @@ namespace
* \ |
* [CircleTransposeConv] [CircleAdd]
* |
- * ([CircleRelu6])
+ * ([CircleRelu/Relu6])
* |
*
- * Note: CircleRelu6 is inserted if Add activation is ReLU6
+ * Note: CircleRelu/Relu6 is inserted if Add activation is ReLU6
*/
bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv)
{
@@ -65,7 +65,8 @@ bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv)
if (add->dtype() != loco::DataType::FLOAT32)
return false;
if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
- add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
+ add->fusedActivationFunction() != luci::FusedActFunc::RELU6 &&
+ add->fusedActivationFunction() != luci::FusedActFunc::RELU)
return false;
// get addition
@@ -102,6 +103,19 @@ bool fuse_add_with_tconv(luci::CircleTransposeConv *tconv)
// remove add node
replace(add).with(relu);
}
+ else if (add->fusedActivationFunction() == luci::FusedActFunc::RELU)
+ {
+ auto name = addition->name();
+ assert(name.length() > 0);
+ // separate relu op from add op
+ auto relu = add->graph()->nodes()->create<luci::CircleRelu>();
+ relu->features(tconv);
+ relu->name(name + "/Relu");
+ luci::add_origin(relu, luci::get_origin(add));
+
+ // remove add node
+ replace(add).with(relu);
+ }
else
{
replace(add).with(tconv);
diff --git a/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp
index 337954960..e6b54df36 100644
--- a/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp
+++ b/compiler/luci/pass/src/FuseBatchNormWithTConvPass.cpp
@@ -29,7 +29,7 @@ namespace
* NOTE TF's BatchNormalization is converted to Mul and Add.
*
* BEFORE
- * | [CircleOutputExclude]
+ * | [CircleConst]/[CircleOutputExclude]
* | / [CircleConst]
* | / /
* [CircleTransposeConv] [CircleConst]
@@ -40,7 +40,7 @@ namespace
* |
*
* AFTER
- * | [CircleOutputExclude]
+ * | [CircleConst]/[CircleOutputExclude]
* +-------------------------------------+ / [CircleConst]
* | | / /
* | [CircleTransposeConv] [CircleConst]
@@ -69,9 +69,10 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
return false;
// check scale and shift constant attributes
- if (scale->rank() != 1)
+ // TODO maybe rank check is not needed
+ if (scale->rank() != 1 && scale->rank() != 4)
return false;
- if (shift->rank() != 1)
+ if (shift->rank() != 1 && shift->rank() != 4)
return false;
// check mul, add attributes
if (mul->dtype() != loco::DataType::FLOAT32)
@@ -82,9 +83,8 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
return false;
- // tconv bias should be not set
- if (not dynamic_cast<luci::CircleOutputExclude *>(tconv->bias()))
- return false;
+ // tconv bias is optional
+ auto bias = dynamic_cast<luci::CircleConst *>(tconv->bias());
// get weight of tconv
auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter());
@@ -96,10 +96,36 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
return false;
auto filter_out_chn = filter->dim(0).value();
- if (filter_out_chn != scale->dim(0).value())
+ // allow scale/shift and bias shape of [N], [1,1,1,N]; BN works for "channel-wise"
+ auto srank = scale->rank() - 1;
+ if (filter_out_chn != scale->dim(srank).value())
return false;
- if (filter_out_chn != shift->dim(0).value())
+ for (uint32_t d = 0; d < srank; ++d)
+ {
+ if (1 != scale->dim(d).value())
+ return false;
+ }
+ srank = shift->rank() - 1;
+ if (filter_out_chn != shift->dim(srank).value())
return false;
+ for (uint32_t d = 0; d < srank; ++d)
+ {
+ if (1 != shift->dim(d).value())
+ return false;
+ }
+ if (bias)
+ {
+ if (bias->dtype() != loco::DataType::FLOAT32)
+ return false;
+ srank = bias->rank() - 1;
+ if (filter_out_chn != bias->dim(srank).value())
+ return false;
+ for (uint32_t d = 0; d < srank; ++d)
+ {
+ if (1 != bias->dim(d).value())
+ return false;
+ }
+ }
auto name = add->name();
assert(name.length() > 0);
@@ -151,6 +177,11 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
for (uint32_t c = 0; c < filter_out_chn; ++c)
{
fused_bias->at<loco::DataType::FLOAT32>(c) = shift->at<loco::DataType::FLOAT32>(c);
+ if (bias != nullptr)
+ {
+ fused_bias->at<loco::DataType::FLOAT32>(c) +=
+ bias->at<loco::DataType::FLOAT32>(c) * scale->at<loco::DataType::FLOAT32>(c);
+ }
}
fused_bias->name(name + "/TransposeConv/bias");
@@ -166,6 +197,10 @@ bool fused_batch_norm_with_tconv(luci::CircleAdd *add)
luci::add_origin(fused_tconv,
luci::composite_origin(
{luci::get_origin(add), luci::get_origin(mul), luci::get_origin(tconv)}));
+ if (bias != nullptr)
+ {
+ luci::add_origin(fused_tconv, luci::get_origin(bias));
+ }
if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
{
diff --git a/compiler/luci/pass/src/FuseInstanceNormPass.cpp b/compiler/luci/pass/src/FuseInstanceNormPass.cpp
index f3ec6cd9e..10a651e35 100644
--- a/compiler/luci/pass/src/FuseInstanceNormPass.cpp
+++ b/compiler/luci/pass/src/FuseInstanceNormPass.cpp
@@ -325,6 +325,10 @@ public:
}
private:
+ bool condition_common_1_5(uint32_t ifm_channel_depth);
+ bool condition_common_3_4();
+
+private:
template <enum PatternVersion> bool match();
public:
@@ -368,21 +372,8 @@ private:
if (not(condition)) \
return false;
-template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion::Version_1>()
+bool InstanceNormPattern::condition_common_1_5(uint32_t ifm_channel_depth)
{
- CHECK_OR_FALSE(luci::fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
- CHECK_OR_FALSE(luci::fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));
-
- auto ifm_circle = loco::must_cast<luci::CircleNode *>(ifm);
- CHECK_OR_FALSE(ifm_circle->shape_status() == luci::ShapeStatus::VALID);
- CHECK_OR_FALSE(ifm_circle->rank() == 4);
- CHECK_OR_FALSE(ifm_circle->dim(3).known());
- uint32_t ifm_channel_depth = ifm_circle->dim(3).value();
-
- CHECK_OR_FALSE(luci::fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma));
-
- CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth));
-
add_as_variance = dynamic_cast<luci::CircleAdd *>(rsqrt->x());
CHECK_OR_FALSE(add_as_variance);
@@ -408,6 +399,70 @@ template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion:
CHECK_OR_FALSE(const_as_beta);
CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_beta, ifm_channel_depth));
+ return true;
+}
+
+bool InstanceNormPattern::condition_common_3_4()
+{
+ // check left sub
+ ifm = sub->x();
+ CHECK_OR_FALSE(ifm);
+
+ luci::CircleNode *ifm_node = loco::must_cast<luci::CircleNode *>(ifm);
+ CHECK_OR_FALSE(ifm_node->rank() == 4);
+ CHECK_OR_FALSE(ifm_node->dim(3).known());
+
+ mean_of_ifm = dynamic_cast<luci::CircleMean *>(sub->y());
+ CHECK_OR_FALSE(mean_of_ifm);
+ CHECK_OR_FALSE(ifm == mean_of_ifm->input());
+
+ // continue search from add_as_variance
+ CHECK_OR_FALSE(luci::fill(&sqrt, &const_as_epsilon).with_commutative_args_of(add_as_variance));
+ CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
+ // TODO Support regarding broadcast
+ CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
+
+ mean_as_variance = dynamic_cast<luci::CircleMean *>(sqrt->x());
+ CHECK_OR_FALSE(mean_as_variance);
+
+ square = dynamic_cast<luci::CircleSquare *>(mean_as_variance->input());
+ CHECK_OR_FALSE(square);
+
+ sub_2 = dynamic_cast<luci::CircleSub *>(square->x());
+ CHECK_OR_FALSE(sub_2);
+ CHECK_OR_FALSE(ifm == sub_2->x());
+
+ mean_of_ifm_2 = dynamic_cast<luci::CircleMean *>(sub_2->y());
+ CHECK_OR_FALSE(mean_of_ifm_2);
+ CHECK_OR_FALSE(ifm == mean_of_ifm_2->input());
+
+ loco::Node *ifm_should_be = nullptr;
+ luci::CircleMean *mean_of_ifm_2_should_be = nullptr;
+ CHECK_OR_FALSE(
+ luci::fill(&ifm_should_be, &mean_of_ifm_2_should_be).with_commutative_args_of(sub_2));
+ CHECK_OR_FALSE(ifm == ifm_should_be);
+ CHECK_OR_FALSE(mean_of_ifm_2 == mean_of_ifm_2_should_be);
+
+ return true;
+}
+
+template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion::Version_1>()
+{
+ CHECK_OR_FALSE(luci::fill(&mul_as_scaled_ifm, &sub).with_commutative_args_of(add_as_terminal));
+ CHECK_OR_FALSE(luci::fill(&ifm, &mul_gamma).with_commutative_args_of(mul_as_scaled_ifm));
+
+ auto ifm_circle = loco::must_cast<luci::CircleNode *>(ifm);
+ CHECK_OR_FALSE(ifm_circle->shape_status() == luci::ShapeStatus::VALID);
+ CHECK_OR_FALSE(ifm_circle->rank() == 4);
+ CHECK_OR_FALSE(ifm_circle->dim(3).known());
+ uint32_t ifm_channel_depth = ifm_circle->dim(3).value();
+
+ CHECK_OR_FALSE(luci::fill(&rsqrt, &const_as_gamma).with_commutative_args_of(mul_gamma));
+
+ CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_gamma, ifm_channel_depth));
+
+ CHECK_OR_FALSE(condition_common_1_5(ifm_channel_depth));
+
luci::CircleMul *mul_gamma_should_be = nullptr;
luci::CircleMean *mean_of_ifm_should_be = nullptr;
@@ -488,44 +543,7 @@ template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion:
CHECK_OR_FALSE(luci::fill(&div, &const_as_gamma).with_commutative_args_of(mul_gamma));
CHECK_OR_FALSE(luci::fill(&sub, &add_as_variance).with_commutative_args_of(div));
- // check left sub
- ifm = sub->x();
- CHECK_OR_FALSE(ifm);
-
- luci::CircleNode *ifm_node = loco::must_cast<luci::CircleNode *>(ifm);
- CHECK_OR_FALSE(ifm_node->rank() == 4);
- CHECK_OR_FALSE(ifm_node->dim(3).known());
-
- mean_of_ifm = dynamic_cast<luci::CircleMean *>(sub->y());
- CHECK_OR_FALSE(mean_of_ifm);
- CHECK_OR_FALSE(ifm == mean_of_ifm->input());
-
- // continue search from add_as_variance
- CHECK_OR_FALSE(luci::fill(&sqrt, &const_as_epsilon).with_commutative_args_of(add_as_variance));
- CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
- // TODO Support regarding broadcast
- CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
-
- mean_as_variance = dynamic_cast<luci::CircleMean *>(sqrt->x());
- CHECK_OR_FALSE(mean_as_variance);
-
- square = dynamic_cast<luci::CircleSquare *>(mean_as_variance->input());
- CHECK_OR_FALSE(square);
-
- sub_2 = dynamic_cast<luci::CircleSub *>(square->x());
- CHECK_OR_FALSE(sub_2);
- CHECK_OR_FALSE(ifm == sub_2->x());
-
- mean_of_ifm_2 = dynamic_cast<luci::CircleMean *>(sub_2->y());
- CHECK_OR_FALSE(mean_of_ifm_2);
- CHECK_OR_FALSE(ifm == mean_of_ifm_2->input());
-
- loco::Node *ifm_should_be = nullptr;
- luci::CircleMean *mean_of_ifm_2_should_be = nullptr;
- CHECK_OR_FALSE(
- luci::fill(&ifm_should_be, &mean_of_ifm_2_should_be).with_commutative_args_of(sub_2));
- CHECK_OR_FALSE(ifm == ifm_should_be);
- CHECK_OR_FALSE(mean_of_ifm_2 == mean_of_ifm_2_should_be);
+ CHECK_OR_FALSE(condition_common_3_4());
_matched = true;
return true;
@@ -546,44 +564,7 @@ template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion:
CHECK_OR_FALSE(div);
CHECK_OR_FALSE(luci::fill(&sub, &add_as_variance).with_commutative_args_of(div));
- // check left sub
- ifm = sub->x();
- CHECK_OR_FALSE(ifm);
-
- luci::CircleNode *ifm_node = loco::must_cast<luci::CircleNode *>(ifm);
- CHECK_OR_FALSE(ifm_node->rank() == 4);
- CHECK_OR_FALSE(ifm_node->dim(3).known());
-
- mean_of_ifm = dynamic_cast<luci::CircleMean *>(sub->y());
- CHECK_OR_FALSE(mean_of_ifm);
- CHECK_OR_FALSE(ifm == mean_of_ifm->input());
-
- // continue search from add_as_variance
- CHECK_OR_FALSE(luci::fill(&sqrt, &const_as_epsilon).with_commutative_args_of(add_as_variance));
- CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
- // TODO Support regarding broadcast
- CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
-
- mean_as_variance = dynamic_cast<luci::CircleMean *>(sqrt->x());
- CHECK_OR_FALSE(mean_as_variance);
-
- square = dynamic_cast<luci::CircleSquare *>(mean_as_variance->input());
- CHECK_OR_FALSE(square);
-
- sub_2 = dynamic_cast<luci::CircleSub *>(square->x());
- CHECK_OR_FALSE(sub_2);
- CHECK_OR_FALSE(ifm == sub_2->x());
-
- mean_of_ifm_2 = dynamic_cast<luci::CircleMean *>(sub_2->y());
- CHECK_OR_FALSE(mean_of_ifm_2);
- CHECK_OR_FALSE(ifm == mean_of_ifm_2->input());
-
- loco::Node *ifm_should_be = nullptr;
- luci::CircleMean *mean_of_ifm_2_should_be = nullptr;
- CHECK_OR_FALSE(
- luci::fill(&ifm_should_be, &mean_of_ifm_2_should_be).with_commutative_args_of(sub_2));
- CHECK_OR_FALSE(ifm == ifm_should_be);
- CHECK_OR_FALSE(mean_of_ifm_2 == mean_of_ifm_2_should_be);
+ CHECK_OR_FALSE(condition_common_3_4());
assert(const_as_gamma == nullptr);
assert(const_as_beta == nullptr);
@@ -612,30 +593,7 @@ template <> bool InstanceNormPattern::match<InstanceNormPattern::PatternVersion:
CHECK_OR_FALSE(ifm_circle->dim(3).known());
uint32_t ifm_channel_depth = ifm_circle->dim(3).value();
- add_as_variance = dynamic_cast<luci::CircleAdd *>(rsqrt->x());
- CHECK_OR_FALSE(add_as_variance);
-
- CHECK_OR_FALSE(
- luci::fill(&mean_as_variance, &const_as_epsilon).with_commutative_args_of(add_as_variance));
-
- CHECK_OR_FALSE(const_as_epsilon->dtype() == loco::DataType::FLOAT32);
- // TODO Support regarding broadcast
- CHECK_OR_FALSE(const_as_epsilon->size<loco::DataType::FLOAT32>() == 1);
-
- CHECK_OR_FALSE(is_instance_mean_v1(mean_as_variance));
-
- sqdiff = dynamic_cast<luci::CircleSquaredDifference *>(mean_as_variance->input());
- CHECK_OR_FALSE(sqdiff);
-
- loco::Node *ifm_should_be = nullptr;
- CHECK_OR_FALSE(luci::fill(&ifm_should_be, &mean_of_ifm).with_commutative_args_of(sqdiff));
- CHECK_OR_FALSE(ifm == ifm_should_be);
- CHECK_OR_FALSE(is_instance_mean_v1(mean_of_ifm));
- CHECK_OR_FALSE(ifm == mean_of_ifm->input());
-
- const_as_beta = dynamic_cast<luci::CircleConst *>(sub->x());
- CHECK_OR_FALSE(const_as_beta);
- CHECK_OR_FALSE(is_1D_with_dummy_dim(const_as_beta, ifm_channel_depth));
+ CHECK_OR_FALSE(condition_common_1_5(ifm_channel_depth));
luci::CircleRsqrt *rsqrt_should_be = nullptr;
luci::CircleMean *mean_of_ifm_should_be = nullptr;
diff --git a/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp
index b4975486d..e8fa2a478 100644
--- a/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp
+++ b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp
@@ -23,6 +23,7 @@
#include <luci/Log.h>
#include <cmath>
+#include <limits>
namespace
{
diff --git a/compiler/luci/pass/src/PropagateQParamForwardPass.cpp b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp
index 003e4c293..aaadb2864 100644
--- a/compiler/luci/pass/src/PropagateQParamForwardPass.cpp
+++ b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp
@@ -138,13 +138,18 @@ struct PropagateQParamForward final : public luci::CircleNodeMutableVisitor<bool
auto qtype = luci::activation_qtype(input_node);
switch (qtype)
{
- case luci::ActivationQType::PreDefinedValue:
- node->quantparam(luci::make_predefined_qparam(input_node->opcode(), node->dtype()));
+ case luci::ActivationQType::PreDefinedLogistic:
+ case luci::ActivationQType::PreDefinedTanh:
+ case luci::ActivationQType::PreDefinedSoftmax:
+ node->quantparam(luci::make_predefined_qparam(qtype, node->dtype()));
break;
case luci::ActivationQType::IntScale:
luci::set_int_scale(node);
break;
default:
+ // This assert ensures this switch-satement handles all ActivationQTypes
+ // TODO Find a better design to remove coupling with ActivationQType
+ assert(qtype == luci::ActivationQType::MinMax);
break;
}
diff --git a/compiler/luci/pass/src/QuantizationUtils.cpp b/compiler/luci/pass/src/QuantizationUtils.cpp
index ad86cedf4..06a4ae9f6 100644
--- a/compiler/luci/pass/src/QuantizationUtils.cpp
+++ b/compiler/luci/pass/src/QuantizationUtils.cpp
@@ -20,6 +20,7 @@
#include <iostream>
#include <cmath>
+#include <limits>
namespace luci
{
@@ -276,31 +277,70 @@ uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices)
indices[2] * dimension.dim(3).value() + indices[3];
}
+// Activation (ofm) qtype is determined in different ways.
+// 1. Pre-defined values: Some Ops have pre-defined qparams (ex: LOGISTIC, TANH)
+// 2. Integer scale: Output of some Ops should be integers (ex: FLOOR, CEIL)
+// 3. Activation qtype of input: Some Ops propagate qparam from input to output (ex: QUANTIZE,
+// TRANSPOSE, etc. See PropagateQParamForwardPass.cpp for more details).
ActivationQType activation_qtype(const CircleNode *node)
{
auto fused_act_node = dynamic_cast<const CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node);
if (fused_act_node && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
- return ActivationQType::PreDefinedValue;
+ return ActivationQType::PreDefinedTanh;
+
+#define RETURN_INPUT_ACTIVATION_QTYPE(CLASS, INPUT) \
+ { \
+ auto n = loco::must_cast<const CLASS *>(node); \
+ auto input = loco::must_cast<CircleNode *>(n->INPUT()); \
+ return activation_qtype(input); \
+ }
switch (node->opcode())
{
case CircleOpcode::LOGISTIC:
+ return ActivationQType::PreDefinedLogistic;
case CircleOpcode::TANH:
+ return ActivationQType::PreDefinedTanh;
case CircleOpcode::SOFTMAX:
- return ActivationQType::PreDefinedValue;
+ return ActivationQType::PreDefinedSoftmax;
case CircleOpcode::FLOOR:
case CircleOpcode::FLOOR_DIV:
case CircleOpcode::FLOOR_MOD:
case CircleOpcode::CEIL:
return ActivationQType::IntScale;
+ case CircleOpcode::GATHER:
+ RETURN_INPUT_ACTIVATION_QTYPE(CircleGather, params);
+ case CircleOpcode::RESHAPE:
+ RETURN_INPUT_ACTIVATION_QTYPE(CircleReshape, tensor);
+ case CircleOpcode::TRANSPOSE:
+ RETURN_INPUT_ACTIVATION_QTYPE(CircleTranspose, a);
+ case CircleOpcode::STRIDED_SLICE:
+ RETURN_INPUT_ACTIVATION_QTYPE(CircleStridedSlice, input);
+ case CircleOpcode::SPLIT:
+ RETURN_INPUT_ACTIVATION_QTYPE(CircleSplit, input);
+ case CircleOpcode::CIRCLESPLITOUT:
+ RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitOut, input);
+ case CircleOpcode::SPLIT_V:
+ RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitV, input);
+ case CircleOpcode::CIRCLESPLITVOUT:
+ RETURN_INPUT_ACTIVATION_QTYPE(CircleSplitVOut, input);
+ case CircleOpcode::UNPACK:
+ RETURN_INPUT_ACTIVATION_QTYPE(CircleUnpack, value);
+ case CircleOpcode::CIRCLEUNPACKOUT:
+ RETURN_INPUT_ACTIVATION_QTYPE(CircleUnpackOut, input);
+ case CircleOpcode::QUANTIZE:
+ RETURN_INPUT_ACTIVATION_QTYPE(CircleQuantize, input);
default:
break;
}
+#undef RETURN_INPUT_ACTIVATION_QTYPE
+
return ActivationQType::MinMax;
}
-std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype)
+std::unique_ptr<CircleQuantParam> make_predefined_qparam(ActivationQType qtype,
+ loco::DataType dtype)
{
auto qparam = std::make_unique<CircleQuantParam>();
@@ -309,9 +349,9 @@ std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, lo
qparam->zerop.emplace_back(zp);
};
- switch (opcode)
+ switch (qtype)
{
- case CircleOpcode::LOGISTIC:
+ case ActivationQType::PreDefinedLogistic:
if (dtype == loco::DataType::U8)
set_qparam(1.0f / 256.0f, 0);
else
@@ -320,7 +360,7 @@ std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, lo
set_qparam(1.0f / 32768.0f, 0);
}
break;
- case CircleOpcode::TANH:
+ case ActivationQType::PreDefinedTanh:
if (dtype == loco::DataType::U8)
set_qparam(2.0f / 256.0f, 128);
else
@@ -329,7 +369,7 @@ std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, lo
set_qparam(1.0f / 32768.0f, 0);
}
break;
- case CircleOpcode::SOFTMAX:
+ case ActivationQType::PreDefinedSoftmax:
if (dtype == loco::DataType::U8)
set_qparam(1.0f / 255.0f, 0);
else
@@ -341,7 +381,7 @@ std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, lo
default:
throw std::runtime_error("Unsupported opcode with pre-defined qparam");
}
- return std::move(qparam);
+ return qparam;
}
// For nodes with integer output, we use integer scale
@@ -395,4 +435,74 @@ void quant_const(luci::CircleConst *node, loco::DataType quant_type)
node->quantparam(std::move(quantparam));
}
+namespace
+{
+
+// TODO move this to a more global helper file
+int nbits(loco::DataType dt) noexcept
+{
+ switch (dt)
+ {
+ case loco::DataType::S8:
+ case loco::DataType::U8:
+ return 8;
+ case loco::DataType::S16:
+ case loco::DataType::U16:
+ case loco::DataType::FLOAT16:
+ return 16;
+ case loco::DataType::S32:
+ case loco::DataType::U32:
+ case loco::DataType::FLOAT32:
+ return 32;
+ case loco::DataType::S64:
+ return 64;
+ default:
+ return 64; // a safe large default
+ }
+}
+
+// TODO Check if the metric is valid
+// Returns true if [min,max] is poorly representable
+bool range_check(float min, float max, loco::DataType dtype)
+{
+ float thresh = 1.5f;
+ return log2f(max) - log2f(min) > nbits(dtype) * thresh;
+}
+
+bool warn_scale_zp(float scale, int64_t zp, luci::CircleNode *n)
+{
+ float min, max;
+ // estimate min/max
+ switch (n->dtype())
+ {
+ case loco::DataType::U8:
+ min = scale * (0 - zp);
+ max = scale * (255 - zp);
+ break;
+ case loco::DataType::S16:
+ min = scale * (-32767);
+ max = scale * (32767);
+ break;
+ default:
+ return false;
+ }
+ return range_check(min, max, n->dtype());
+}
+
+} // namespace
+
+void warn_accuracy_with_range(luci::CircleNode *n)
+{
+ LOGGER(l);
+ auto qp = n->quantparam();
+ auto k = qp->zerop.size();
+ for (uint32_t i = 0; i < k; i++)
+ {
+ if (warn_scale_zp(qp->scale[i], qp->zerop[i], n))
+ WARN(l) << "Quantization of " << i << "-th channel of " << n->name()
+ << "'s quantization may cause accuracy issues" << std::endl;
+ ;
+ }
+}
+
} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizationUtils.h b/compiler/luci/pass/src/QuantizationUtils.h
index cd8cec95a..4d5316ccb 100644
--- a/compiler/luci/pass/src/QuantizationUtils.h
+++ b/compiler/luci/pass/src/QuantizationUtils.h
@@ -62,15 +62,19 @@ bool is_quantized(const CircleNode *node);
enum ActivationQType
{
- MinMax, // Quantize using recorded min/max
- PreDefinedValue, // Quantize using pre-defined values
- IntScale, // Round scale to a positive integer
+ MinMax, // Quantize using recorded min/max
+ PreDefinedLogistic, // Quantize using pre-defined values
+ PreDefinedTanh, // Quantize using pre-defined values
+ PreDefinedSoftmax, // Quantize using pre-defined values
+ IntScale, // Round scale to a positive integer
};
ActivationQType activation_qtype(const CircleNode *node);
// Create qparam with pre-defined values for speical operators
-std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype);
+std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleNode *node, loco::DataType dtype);
+std::unique_ptr<CircleQuantParam> make_predefined_qparam(ActivationQType qtype,
+ loco::DataType dtype);
// Update node's scale to a positive integer (for special Ops e.g., Floor, Ceil)
void set_int_scale(luci::CircleNode *node);
@@ -78,6 +82,10 @@ void set_int_scale(luci::CircleNode *node);
// Quantize const tensor using its min/max values
void quant_const(luci::CircleConst *node, loco::DataType quant_type);
+// Check that a node is quantized without significant loss of precision;
+// Emits warnings to log with WARN
+void warn_accuracy_with_range(luci::CircleNode *n);
+
} // namespace luci
#endif // __LUCI_QUANTIZATION_UTILS_H__
diff --git a/compiler/luci/pass/src/QuantizeActivation.cpp b/compiler/luci/pass/src/QuantizeActivation.cpp
index 149331824..95251a82c 100644
--- a/compiler/luci/pass/src/QuantizeActivation.cpp
+++ b/compiler/luci/pass/src/QuantizeActivation.cpp
@@ -114,29 +114,26 @@ void QuantizeSpecialActivation::visit(luci::CircleNode *node)
auto fused_act_node = dynamic_cast<CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node);
if (fused_act_node != nullptr && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
{
- auto qparam = make_predefined_qparam(luci::CircleOpcode::TANH, output_type);
+ auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedTanh, output_type);
node->quantparam(std::move(qparam));
}
}
void QuantizeSpecialActivation::visit(luci::CircleLogistic *node)
{
- assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue);
- auto qparam = make_predefined_qparam(luci::CircleOpcode::LOGISTIC, output_type);
+ auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedLogistic, output_type);
node->quantparam(std::move(qparam));
}
void QuantizeSpecialActivation::visit(luci::CircleTanh *node)
{
- assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue);
- auto qparam = make_predefined_qparam(luci::CircleOpcode::TANH, output_type);
+ auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedTanh, output_type);
node->quantparam(std::move(qparam));
}
void QuantizeSpecialActivation::visit(luci::CircleSoftmax *node)
{
- assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue);
- auto qparam = make_predefined_qparam(luci::CircleOpcode::SOFTMAX, output_type);
+ auto qparam = make_predefined_qparam(luci::ActivationQType::PreDefinedSoftmax, output_type);
node->quantparam(std::move(qparam));
}
diff --git a/compiler/luci/pass/src/QuantizeBias.cpp b/compiler/luci/pass/src/QuantizeBias.cpp
index aa496232a..de97a14dd 100644
--- a/compiler/luci/pass/src/QuantizeBias.cpp
+++ b/compiler/luci/pass/src/QuantizeBias.cpp
@@ -22,6 +22,7 @@
#include <algorithm>
#include <cmath>
+#include <limits>
using namespace luci;
@@ -201,6 +202,18 @@ CircleConst *QuantizeBias::quantized_bias(CircleNode *input, const CircleNode *w
std::vector<float> scaling_factor(size);
std::vector<int64_t> zp(size);
+ if (const_bias->rank() == 0)
+ {
+ // TODO Support quantization of scalar bias
+ throw std::runtime_error("Quantization of scalar bias is not yet supported (" +
+ const_bias->name() + ")");
+ }
+ if (size != const_bias->dim(const_bias->rank() - 1).value())
+ {
+ throw std::runtime_error(const_bias->name() +
+ " (bias) should have the shape of [1, 1, .. 1, channel]");
+ }
+
if (output_type == loco::DataType::U8)
{
new_bias = quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
@@ -218,6 +231,7 @@ CircleConst *QuantizeBias::quantized_bias(CircleNode *input, const CircleNode *w
auto quantparam = std::make_unique<CircleQuantParam>();
quantparam->scale = scaling_factor;
quantparam->zerop = zp;
+ quantparam->quantized_dimension = const_bias->rank() - 1;
assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
new_bias->quantparam(std::move(quantparam));
diff --git a/compiler/luci/pass/src/QuantizeBias.test.cpp b/compiler/luci/pass/src/QuantizeBias.test.cpp
new file mode 100644
index 000000000..0104a191b
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeBias.test.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeBias.h"
+
+#include <luci/test/TestIOGraph.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleQuantParam.h>
+
+#include <gtest/gtest.h>
+
+using namespace luci;
+
+namespace
+{
+
+using namespace luci::test;
+
+// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp
+template <typename T>
+luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+ const std::vector<uint32_t> &shape, T value)
+{
+ auto node = g->nodes()->create<luci::CircleConst>();
+ node->dtype(dtype);
+ node->rank(shape.size());
+
+ uint32_t size = 1;
+ for (uint32_t i = 0; i < shape.size(); ++i)
+ {
+ node->dim(i) = shape.at(i);
+ size *= shape.at(i);
+ }
+ node->shape_status(luci::ShapeStatus::VALID);
+
+#define INIT_VALUES(DT) \
+ { \
+ node->size<DT>(size); \
+ for (uint32_t i = 0; i < size; ++i) \
+ node->at<DT>(i) = value; \
+ }
+
+ switch (dtype)
+ {
+ case loco::DataType::U8:
+ INIT_VALUES(loco::DataType::U8);
+ break;
+ case loco::DataType::S16:
+ INIT_VALUES(loco::DataType::S16);
+ break;
+ case loco::DataType::S32:
+ INIT_VALUES(loco::DataType::S32);
+ break;
+ case loco::DataType::FLOAT32:
+ INIT_VALUES(loco::DataType::FLOAT32)
+ break;
+ default:
+ INTERNAL_EXN("create_const_node called with unsupported type");
+ break;
+ }
+ return node;
+}
+
+/**
+ * Simple graph for test
+ *
+ * BEFORE
+ *
+ * [IFM] [WEIGHTS] [BIAS(FP32)]
+ * \ | /
+ * [FC]
+ * |
+ * [OFM]
+ *
+ * AFTER
+ *
+ * [IFM] [WEIGHTS] [BIAS(Quantized)]
+ * \ | /
+ * [FC]
+ * |
+ * [OFM]
+ */
+struct Q8FCGraphlet
+{
+public:
+ Q8FCGraphlet() = default;
+ virtual ~Q8FCGraphlet() = default;
+
+ void init(loco::Graph *g, const ShapeU32 out_shape, const ShapeU32 w_shape,
+ const ShapeU32 bias_shape, const float bv)
+ {
+ _fc = g->nodes()->create<luci::CircleFullyConnected>();
+ _fc->input(_x);
+ _x->dtype(loco::DataType::U8);
+ {
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->scale.push_back(1.0);
+ quantparam->zerop.push_back(0);
+ quantparam->quantized_dimension = 0;
+ _x->quantparam(std::move(quantparam));
+ }
+
+ auto weights = create_const_node<uint8_t>(g, loco::DataType::U8, w_shape, 1.0);
+ auto w_qparam = std::make_unique<CircleQuantParam>();
+ std::vector<float> w_scale(weights->dim(0).value(), 1.0);
+ std::vector<int64_t> w_zp(weights->dim(0).value(), 0);
+ w_qparam->scale = w_scale;
+ w_qparam->zerop = w_zp;
+ w_qparam->quantized_dimension = 0;
+ weights->quantparam(std::move(w_qparam));
+ _fc->weights(weights);
+ _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _fc->dtype(loco::DataType::U8);
+ _fc->shape(out_shape);
+ auto l = _fc->dim(_fc->rank() - 1).value();
+ _fc->bias(create_const_node(g, loco::DataType::FLOAT32, bias_shape, bv));
+ _fc->name("fc");
+ {
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->scale.push_back(1.0);
+ quantparam->zerop.push_back(0);
+ quantparam->quantized_dimension = 0;
+ _fc->quantparam(std::move(quantparam));
+ }
+ }
+
+public:
+ luci::CircleFullyConnected *fc() { return _fc; }
+
+protected:
+ luci::CircleFullyConnected *_fc = nullptr;
+ luci::CircleInput *_x = nullptr;
+};
+
+struct Q8FCGraph final : public TestIGraphlet, public TestOGraphlet, public Q8FCGraphlet
+{
+ void init(const ShapeU32 in_shape, const ShapeU32 w_shape, const ShapeU32 out_shape,
+ const ShapeU32 bias_shape, const float bv)
+ {
+ TestIGraphlet::init(g(), in_shape);
+ TestOGraphlet::init(g(), out_shape);
+ _x = input();
+ Q8FCGraphlet::init(g(), out_shape, w_shape, bias_shape, bv);
+ output()->from(_fc);
+ }
+};
+
+class CQ8QuantizeBiasFCTest : public ::testing::Test
+{
+public:
+ Q8FCGraph g;
+ luci::QuantizeBias qb{loco::DataType::FLOAT32, loco::DataType::U8,
+ luci::QuantizationGranularity::ChannelWise};
+};
+
+} // namespace
+
+TEST_F(CQ8QuantizeBiasFCTest, fully_connected)
+{
+ g.init({1, 18, 80}, {256, 80}, {18, 256}, {1, 256}, 1);
+ g.fc()->accept(&qb);
+
+ auto bias = loco::must_cast<CircleConst *>(g.fc()->bias());
+ auto qparam = bias->quantparam();
+
+ EXPECT_NE(nullptr, qparam);
+ EXPECT_EQ(256, qparam->scale.size());
+ EXPECT_EQ(256, qparam->zerop.size());
+ EXPECT_EQ(1, qparam->quantized_dimension);
+}
+
+TEST_F(CQ8QuantizeBiasFCTest, wrong_bias_shape_NEG)
+{
+ g.init({1, 18, 80}, {256, 80}, {18, 256}, {1, 2, 128}, 1);
+ EXPECT_ANY_THROW(g.fc()->accept(&qb)); // Wrong bias shape
+}
diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
index c9b35e0be..ef047d35d 100644
--- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
+++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
@@ -27,6 +27,7 @@
#include <iostream>
#include <cmath>
#include <functional>
+#include <limits>
namespace
{
@@ -352,15 +353,15 @@ private:
private:
// Check if
// 1. node is const
- // 2. node was not quantized
+ // 2. node's dtype is float32
bool is_quantizable(loco::Node *node)
{
auto const_node = dynamic_cast<luci::CircleConst *>(node);
if (not const_node)
return false;
- // Skip if this is already quantized
- if (is_quantized(const_node))
+ // Skip if this is not float32
+ if (const_node->dtype() != loco::DataType::FLOAT32)
return false;
return true;
diff --git a/compiler/luci/pass/src/QuantizeWeights.cpp b/compiler/luci/pass/src/QuantizeWeights.cpp
index 11322ab44..500ae12ed 100644
--- a/compiler/luci/pass/src/QuantizeWeights.cpp
+++ b/compiler/luci/pass/src/QuantizeWeights.cpp
@@ -23,6 +23,7 @@
#include <cmath>
#include <vector>
#include <functional>
+#include <limits>
using namespace luci;
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
index d9a9d4db7..005144516 100644
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -41,10 +41,28 @@ namespace
{
using namespace luci;
+
+bool use_predefined_values(ActivationQType qtype)
+{
+ switch (qtype)
+ {
+ case ActivationQType::PreDefinedLogistic:
+ case ActivationQType::PreDefinedTanh:
+ case ActivationQType::PreDefinedSoftmax:
+ return true;
+ default:
+ // This ensures this switch-statement handles all ActivationQTypes
+ assert(qtype == ActivationQType::IntScale or qtype == ActivationQType::MinMax);
+ break;
+ }
+
+ return false;
+}
+
// Create a Quantize Op whose
// dtype is out_type
// shape is the same with node
-// qparam is computed using node's min/max
+// qparam is computed according to node's qtype
luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType out_type)
{
auto quantize = node->graph()->nodes()->create<CircleQuantize>();
@@ -60,9 +78,9 @@ luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType
assert(qparam); // FIX_CALLER_UNLESS
auto qtype = luci::activation_qtype(node);
- if (qtype == ActivationQType::PreDefinedValue)
+ if (use_predefined_values(qtype))
{
- quantize->quantparam(luci::make_predefined_qparam(node->opcode(), out_type));
+ quantize->quantparam(luci::make_predefined_qparam(qtype, out_type));
return quantize;
}
@@ -105,6 +123,23 @@ luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType
return quantize;
}
+// Create Dequantize Op whose shape is the same with node
+luci::CircleDequantize *create_dequantize(luci::CircleNode *node)
+{
+ auto dequantize = node->graph()->nodes()->create<luci::CircleDequantize>();
+ dequantize->name(node->name() + "_Dequantize");
+ dequantize->dtype(loco::DataType::FLOAT32);
+ dequantize->rank(node->rank());
+ for (uint32_t i = 0; i < node->rank(); i++)
+ dequantize->dim(i).set(node->dim(i).value());
+
+ dequantize->shape_status(luci::ShapeStatus::VALID);
+
+ luci::add_origin(dequantize, luci::get_origin(node));
+
+ return dequantize;
+}
+
} // namespace
namespace luci
@@ -229,11 +264,13 @@ private:
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFullyConnected, input)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleGather, params)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleInstanceNorm, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLeakyRelu, features)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLocalResponseNormalization, input)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLogistic, x)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMaxPool2D, value)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMean, input)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMirrorPad, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleNeg, x)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePad, input)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePadV2, input)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePRelu, input)
@@ -241,6 +278,7 @@ private:
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMax, input)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMin, input)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu, features)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu6, features)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReshape, tensor)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeBilinear, input)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeNearestNeighbor, input)
@@ -250,6 +288,7 @@ private:
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSoftmax, logits)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToBatchND, input)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToDepth, input)
+ INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqueeze, input)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqrt, x)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleStridedSlice, input)
INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSum, input)
@@ -353,7 +392,9 @@ void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
luci::add_origin(quant_op, luci::get_origin(succ));
}
- // Requantize input
+ // Update qparam of input
+ // This step is skipped if input_type is float32
+ if (_ctx->input_type != loco::DataType::FLOAT32)
{
auto quantparam = input->quantparam();
assert(quantparam);
@@ -376,11 +417,13 @@ void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
assert(_ctx->input_type == loco::DataType::S16);
compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
}
- input->dtype(_ctx->input_type);
input->quantparam()->scale[0] = scaling_factor;
input->quantparam()->zerop[0] = zp;
}
+ // Update dtype of input
+ input->dtype(_ctx->input_type);
+
auto graph_input = inputs->at(input->index());
graph_input->dtype(_ctx->input_type);
}
@@ -405,13 +448,26 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
if (not from->quantparam())
continue;
- // Insert Quantize Op
- auto quant_op = create_quantize_op(from, _ctx->output_type);
- loco::replace(from).with(quant_op);
- quant_op->input(from);
+ // Insert Dequantize Op for float32 output_type
+ if (_ctx->output_type == loco::DataType::FLOAT32)
+ {
+ auto dequant_op = create_dequantize(from);
+ loco::replace(from).with(dequant_op);
+ dequant_op->input(from);
+ }
+ else
+ {
+ // Insert Quantize Op for non-float32 output_type
+ auto quant_op = create_quantize_op(from, _ctx->output_type);
+ loco::replace(from).with(quant_op);
+ quant_op->input(from);
- // TODO Set a proper origin (Quantize should have its own Origin)
- luci::add_origin(quant_op, luci::get_origin(from));
+ // TODO Set a proper origin (Quantize should have its own Origin)
+ luci::add_origin(quant_op, luci::get_origin(from));
+ }
+
+ // Update dtype of output
+ output->dtype(_ctx->output_type);
auto graph_output = outputs->at(output->index());
graph_output->dtype(_ctx->output_type);
@@ -594,12 +650,25 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
// Set output type
set_output_type(g);
+ // Remove redundant Quantize Op
+ {
+ logo::Phase phase;
+
+ phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
+
+ ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+ logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+ phase_runner.attach(&prog);
+ phase_runner.run(phase);
+ }
+
// Remove min/max values
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
if (auto qparam = circle_node->quantparam())
{
+ warn_accuracy_with_range(circle_node);
qparam->min.clear();
qparam->max.clear();
}
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
index cebafd32b..21b4fe1c6 100644
--- a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
@@ -1088,6 +1088,31 @@ private:
luci::CircleConst *_const = nullptr;
};
+class ReduceMaxTestGraph final : public SimpleTestGraph
+{
+public:
+ void init(void) override
+ {
+ TestIOGraph::init({4, 3, 2}, {2});
+
+ _axis = create_const<Type::S32, int32_t>(g(), {4}, {1, 0, -3, -3});
+ _reduce_max = g()->nodes()->create<luci::CircleReduceMax>();
+ {
+ _reduce_max->input(input());
+ _reduce_max->reduction_indices(_axis);
+ _reduce_max->name("test");
+ _reduce_max->keep_dims(false);
+ }
+ output()->from(_reduce_max);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+private:
+ luci::CircleReduceMax *_reduce_max = nullptr;
+ luci::CircleConst *_axis = nullptr;
+};
+
class ResizeBilinearTestGraph final : public SimpleTestGraph
{
public:
@@ -2345,6 +2370,34 @@ TEST(QuantizedModelVerifierTest, Pow_wrong_granularity_NEG)
SUCCEED();
}
+TEST(QuantizedModelVerifierTest, ReduceMax)
+{
+ TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise);
+
+ TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_LAYER_INFO(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ReduceMax_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, ReduceMax_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(ReduceMaxTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
TEST(QuantizedModelVerifierTest, ResizeBilinear)
{
TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
diff --git a/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp b/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp
new file mode 100644
index 000000000..66cd9d791
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantDequantizePass.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantDequantizePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool remove_redundant_dequant(luci::CircleDequantize *dequant)
+{
+ assert(dequant != nullptr);
+
+ auto prev = loco::must_cast<luci::CircleNode *>(dequant->input());
+ if (prev->dtype() != loco::DataType::FLOAT32)
+ return false;
+
+ replace(dequant).with(prev);
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+/**
+ * Dequantize Op does the below things on the ifm.
+ * 1. Element-wise update of quantized values (u8/s16) to fp32 values
+ * 2. Update dtype to fp32
+ * If the previous node is not quantized, dequantize Op is redundant.
+ *
+ * BEFORE
+ *
+ * [CircleNode (A)]
+ * |
+ * [CircleNode (B)] (fp32)
+ * |
+ * [CircleDequantize]
+ * |
+ * [CircleNode]
+ *
+ * AFTER
+ *
+ * [CircleNode (A)]
+ * |
+ * [CircleNode (B)] (fp32)
+ * |
+ * [CircleNode]
+ */
+bool RemoveRedundantDequantizePass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto target_node = dynamic_cast<luci::CircleDequantize *>(node);
+ if (target_node != nullptr)
+ {
+ if (remove_redundant_dequant(target_node))
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp b/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp
new file mode 100644
index 000000000..adb2f14a4
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantDequantizePass.test.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantDequantizePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class DequantizeGraphlet
+{
+public:
+ DequantizeGraphlet() = default;
+
+public:
+ void init(loco::Graph *g)
+ {
+ _dequantize = g->nodes()->create<luci::CircleDequantize>();
+ _dequantize->dtype(loco::DataType::FLOAT32);
+ _dequantize->name("dequantize");
+ }
+
+protected:
+ luci::CircleDequantize *_dequantize = nullptr;
+};
+
+class RedundantDequantizeGraph : public TestIOGraph, public DequantizeGraphlet
+{
+public:
+ RedundantDequantizeGraph() = default;
+
+public:
+ void init(void)
+ {
+ TestIOGraph::init({1}, {1});
+ DequantizeGraphlet::init(g());
+
+ _dequantize->input(input());
+
+ output()->from(_dequantize);
+ }
+
+ void init_u8_input(void)
+ {
+ TestIOGraph::init({1}, {1});
+ DequantizeGraphlet::init(g());
+
+ // Use u8 input (dequantize is not redundant anymore)
+ input()->dtype(loco::DataType::U8);
+ {
+ auto qparam = std::make_unique<luci::CircleQuantParam>();
+ qparam->scale = {1};
+ qparam->zerop = {1};
+ input()->quantparam(std::move(qparam));
+ }
+
+ _dequantize->input(input());
+
+ output()->from(_dequantize);
+ }
+};
+
+} // namespace
+
+TEST(RemoveRedundantDequantizePass, single_redundant_dequantize)
+{
+ RedundantDequantizeGraph g;
+ luci::RemoveRedundantDequantizePass pass;
+
+ g.init();
+
+ EXPECT_TRUE(pass.run(g.g()));
+
+ int count = 0;
+ for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+ {
+ if (dynamic_cast<luci::CircleDequantize *>(node))
+ {
+ count++;
+ }
+ }
+
+ ASSERT_EQ(0, count);
+}
+
+TEST(RemoveRedundantDequantizePass, wrong_dtype_NEG)
+{
+ RedundantDequantizeGraph g;
+ luci::RemoveRedundantDequantizePass pass;
+
+ g.init_u8_input();
+
+ EXPECT_FALSE(pass.run(g.g()));
+}
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp
new file mode 100644
index 000000000..476ec68bf
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+bool acceptable_intermediate_op(const loco::Node *node)
+{
+ if (not node)
+ return false;
+
+ const auto opcode = loco::must_cast<const luci::CircleNode *>(node)->opcode();
+
+ switch (opcode)
+ {
+ case luci::CircleOpcode::ADD:
+ case luci::CircleOpcode::MUL:
+ case luci::CircleOpcode::TANH:
+ case luci::CircleOpcode::LOGISTIC:
+ break;
+
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+bool same_shape(const loco::Node *a, const loco::Node *b)
+{
+ auto a_cnode = loco::must_cast<const luci::CircleNode *>(a);
+ auto b_cnode = loco::must_cast<const luci::CircleNode *>(b);
+
+ if (a_cnode->rank() != b_cnode->rank())
+ return false;
+
+ for (uint32_t i = 0; i < a_cnode->rank(); i++)
+ {
+ if (not(a_cnode->dim(i) == b_cnode->dim(i)))
+ return false;
+ }
+ return true;
+}
+
+class PreReshapeFinder
+{
+public:
+ PreReshapeFinder(const luci::CircleReshape *post_reshape) : _post_reshape(post_reshape)
+ {
+ assert(post_reshape != nullptr); // FIX_CALLER_UNLESS
+ }
+
+public:
+ // Return true if pre_reshapes are found
+ bool collect_pre_reshapes(loco::Node *node)
+ {
+ // TODO Support diamond case
+ if (loco::succs(node).size() != 1)
+ return false;
+
+ if (auto pre_reshape = dynamic_cast<luci::CircleReshape *>(node))
+ {
+ // Check ifm of pre-reshape and ofm of post_reshape
+ if (not same_shape(pre_reshape->tensor(), _post_reshape))
+ return false;
+
+ // Check ofm of pre-reshape and ifm of post_reshape
+ if (not same_shape(pre_reshape, _post_reshape->tensor()))
+ return false;
+
+ _pre_reshapes.emplace_back(pre_reshape);
+ return true;
+ }
+
+ if (not acceptable_intermediate_op(node))
+ return false;
+
+ for (uint32_t i = 0; i < node->arity(); i++)
+ {
+ if (not collect_pre_reshapes(node->arg(i)))
+ return false;
+ }
+
+ return true;
+ }
+
+public:
+ std::vector<luci::CircleReshape *> pre_reshapes(void) const { return _pre_reshapes; }
+
+private:
+ const luci::CircleReshape *_post_reshape = nullptr;
+ std::vector<luci::CircleReshape *> _pre_reshapes;
+};
+
+bool remove_unnecessary_reshape_net(luci::CircleReshape *reshape)
+{
+ PreReshapeFinder finder(reshape);
+ if (not finder.collect_pre_reshapes(reshape->tensor()))
+ return false;
+
+ // Remove pre_reshapes
+ for (auto pre_reshape : finder.pre_reshapes())
+ {
+ loco::replace(pre_reshape).with(pre_reshape->tensor());
+ }
+
+ // Remove post_reshape
+ loco::replace(reshape).with(reshape->tensor());
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * BEFORE
+ *
+ * [CircleNode]
+ * |
+ * [CircleReshape_1] (shape: A -> B)
+ * |
+ * [CircleNode] (ex: Add/Mul/Tanh/Logistic ..)
+ * |
+ * [CircleReshape_2] (shape: B -> A)
+ * |
+ * [CircleNode]
+ *
+ * AFTER
+ *
+ * [CircleNode]
+ * | \
+ * | [CircleReshape_1]
+ * [CircleNode]
+ * | \
+ * | [CircleReshape_2]
+ * [CircleNode]
+ **/
+bool RemoveUnnecessaryReshapeNetPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto reshape_node = dynamic_cast<luci::CircleReshape *>(node))
+ {
+ if (remove_unnecessary_reshape_net(reshape_node))
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp
new file mode 100644
index 000000000..4ad707ba3
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapeNetPass.test.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci/Pass/RemoveUnnecessaryReshapeNetPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class RemoveUnnecessaryReshapeNet : public ::testing::Test
+{
+public:
+ RemoveUnnecessaryReshapeNet() {}
+
+ void createReshapeConst(luci::CircleReshape *target, const std::vector<uint32_t> shape)
+ {
+ auto shape_const = g.nodes()->create<luci::CircleConst>();
+ shape_const->dtype(loco::DataType::S32);
+ shape_const->size<loco::DataType::S32>(shape.size());
+ shape_const->shape_status(luci::ShapeStatus::VALID);
+ shape_const->rank(1);
+ shape_const->dim(0).set(shape.size());
+ for (int32_t i = 0; i < shape.size(); i++)
+ {
+ shape_const->at<loco::DataType::S32>(i) = static_cast<int32_t>(shape.at(i));
+ }
+ shape_const->name("shape_const");
+ target->shape(shape_const);
+ target->rank(shape.size());
+ for (uint32_t i = 0; i < shape.size(); i++)
+ {
+ target->dim(i) = shape[i];
+ }
+ target->shape_status(luci::ShapeStatus::VALID);
+ }
+
+ void buildGraph(const std::initializer_list<uint32_t> base_shape,
+ const std::initializer_list<uint32_t> first_shape,
+ const std::initializer_list<uint32_t> second_shape)
+ {
+ // Input Create.
+ input = g.nodes()->create<luci::CircleInput>();
+ auto graph_input = g.inputs()->create();
+ input->index(graph_input->index());
+ input->shape_status(luci::ShapeStatus::VALID);
+ input->shape(base_shape);
+ input->name("input");
+
+ // Create first reshape.
+ first_reshape = g.nodes()->create<luci::CircleReshape>();
+ first_reshape->tensor(input);
+ first_reshape->name("Reshape");
+ createReshapeConst(first_reshape, first_shape);
+
+ // Create logistic.
+ logistic = g.nodes()->create<luci::CircleLogistic>();
+ logistic->x(first_reshape);
+ logistic->name("logistic");
+ logistic->shape(first_shape);
+ logistic->shape_status(luci::ShapeStatus::VALID);
+
+ // Create second reshape.
+ second_reshape = g.nodes()->create<luci::CircleReshape>();
+ second_reshape->tensor(logistic);
+ second_reshape->name("second_reshape");
+ createReshapeConst(second_reshape, second_shape);
+
+ // Output Connect.
+ output = g.nodes()->create<luci::CircleOutput>();
+ output->from(second_reshape);
+ output->name("output");
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+ }
+
+public:
+ loco::Graph g;
+ luci::CircleInput *input = nullptr;
+ luci::CircleReshape *first_reshape = nullptr;
+ luci::CircleLogistic *logistic = nullptr;
+ luci::CircleReshape *second_reshape = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST_F(RemoveUnnecessaryReshapeNet, simple_case)
+{
+ buildGraph({1, 1, 1, 32}, {1, 1, 32, 1}, {1, 1, 1, 32});
+ luci::RemoveUnnecessaryReshapeNetPass pass;
+
+ ASSERT_TRUE(pass.run(&g));
+
+ int count = 0;
+ for (auto node : loco::active_nodes(loco::output_nodes(&g)))
+ {
+ if (auto reshape = dynamic_cast<luci::CircleReshape *>(node))
+ count++;
+ }
+ ASSERT_EQ(0, count);
+}
+
+TEST_F(RemoveUnnecessaryReshapeNet, shape_mismatch_NEG)
+{
+ buildGraph({1, 1, 1, 32}, {1, 1, 32, 1}, {1, 1, 2, 16});
+ luci::RemoveUnnecessaryReshapeNetPass pass;
+ ASSERT_FALSE(pass.run(&g));
+}
diff --git a/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp
new file mode 100644
index 000000000..741b70956
--- /dev/null
+++ b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.cpp
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h>
+
+namespace
+{
+
+// TODO move to global helper list if needed
+/**
+ * @brief Create a node with `inp` as input from fused activation fucntion `act`
+ */
+luci::CircleNode *fromActivation(luci::CircleNode *inp, luci::FusedActFunc act)
+{
+ switch (act)
+ {
+ case luci::FusedActFunc::NONE:
+ return inp;
+ case luci::FusedActFunc::RELU:
+ {
+ auto n = inp->graph()->nodes()->create<luci::CircleRelu>();
+ n->features(inp);
+ return n;
+ }
+ case luci::FusedActFunc::RELU6:
+ {
+ auto n = inp->graph()->nodes()->create<luci::CircleRelu6>();
+ n->features(inp);
+ return n;
+ }
+ case luci::FusedActFunc::RELU_N1_TO_1:
+ {
+ auto n = inp->graph()->nodes()->create<luci::CircleReluN1To1>();
+ n->features(inp);
+ return n;
+ }
+ case luci::FusedActFunc::TANH:
+ {
+ auto n = inp->graph()->nodes()->create<luci::CircleTanh>();
+ n->x(inp);
+ return n;
+ }
+ case luci::FusedActFunc::SIGN_BIT:
+ {
+ throw std::invalid_argument("no matching node to create from fused activation");
+ }
+ default:
+ throw std::invalid_argument("invalid fused activation");
+ }
+}
+
+/**
+ * Replace Fully Connected with Batched MatMul
+ *
+ * BEFORE
+ *
+ * [Node1] [Node2]
+ * | |
+ * [transpose]? [transpose]?
+ * \ /
+ * [FullyConnected]
+ *
+ * AFTER
+ *
+ * [Node1] [Node2]
+ * \ /
+ * [BatchMatMul] [BiasValue]?
+ * \ /
+ * [Add]?
+ * |
+ * [Activation]?
+ *
+ * Nodes with "?" denote optional elements
+ */
+bool replace_fc_with_matmul(luci::CircleFullyConnected *fc)
+{
+ luci::CircleNode *x = nullptr;
+ luci::CircleNode *y = nullptr;
+ luci::CircleNode *b = nullptr;
+ luci::CircleTranspose *ty = nullptr;
+ luci::CircleTranspose *tx = nullptr;
+ bool adj_x = false;
+ bool adj_y = true;
+
+ if (dynamic_cast<luci::CircleConst *>(fc->weights()))
+ return false; // NonConst
+
+ if ((ty = dynamic_cast<luci::CircleTranspose *>(fc->weights()))) // is y a transpose?
+ {
+ adj_y = false;
+ if (dynamic_cast<luci::CircleConst *>(ty->a()))
+ return false;
+ else
+ y = loco::must_cast<luci::CircleNode *>(ty->a());
+ }
+ else
+ { // y is not transpose and not const
+ y = loco::must_cast<luci::CircleNode *>(fc->weights());
+ }
+ if ((tx = dynamic_cast<luci::CircleTranspose *>(fc->input())))
+ {
+ adj_x = true;
+ x = loco::must_cast<luci::CircleNode *>(tx->a());
+ }
+ else
+ {
+ x = loco::must_cast<luci::CircleNode *>(fc->input());
+ }
+
+ b = loco::must_cast<luci::CircleNode *>(fc->bias());
+
+ if (x->dtype() != loco::DataType::FLOAT32 || y->dtype() != loco::DataType::FLOAT32 ||
+ b->dtype() != loco::DataType::FLOAT32)
+ return false;
+
+ auto name = fc->name();
+ assert(name.length() > 0);
+
+ auto matmul = fc->graph()->nodes()->create<luci::CircleBatchMatMul>();
+ matmul->x(x);
+ matmul->y(y);
+ matmul->adj_x(adj_x);
+ matmul->adj_y(adj_y);
+ matmul->name(name);
+ matmul->dtype(fc->dtype());
+
+ luci::add_origin(matmul, luci::get_origin(fc));
+
+ auto all_zero = [](const luci::CircleConst *c) {
+ bool ac = true;
+ for (uint32_t i = 0; i < c->size<loco::DataType::FLOAT32>() && ac; i++)
+ {
+ ac &= c->at<loco::DataType::FLOAT32>(i) == 0.0f;
+ }
+ return ac;
+ };
+
+ auto bc = dynamic_cast<luci::CircleConst *>(b);
+ if ((nullptr != bc) && !all_zero(bc))
+ {
+ auto bias_add = fc->graph()->nodes()->create<luci::CircleAdd>();
+ bias_add->x(matmul);
+ bias_add->y(b);
+ bias_add->name(fc->name() + "/bias_add");
+ bias_add->dtype(fc->dtype());
+ add_origin(bias_add, get_origin(fc));
+ bias_add->fusedActivationFunction(fc->fusedActivationFunction());
+ loco::replace(fc).with(bias_add);
+ }
+ else
+ {
+ auto n = fromActivation(matmul, fc->fusedActivationFunction());
+ add_origin(n, luci::get_origin(fc));
+ n->name(fc->name() + "fusedActivation");
+ n->dtype(fc->dtype());
+ loco::replace(fc).with(n);
+ }
+
+ return true;
+}
+} // namespace
+
+namespace luci
+{
+
+bool ReplaceNonConstFCWithBatchMatMulPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto fc = dynamic_cast<luci::CircleFullyConnected *>(node))
+ {
+ if (replace_fc_with_matmul(fc))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp
new file mode 100644
index 000000000..7606a6125
--- /dev/null
+++ b/compiler/luci/pass/src/ReplaceNonConstFCWithBatchMatMulPass.test.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ReplaceNonConstFCWithBatchMatMulPass.h"
+
+#include <luci/test/TestIOGraph.h>
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp
+template <typename T>
+luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+ const std::vector<uint32_t> &shape,
+ const std::vector<T> &values)
+{
+ auto node = g->nodes()->create<luci::CircleConst>();
+ node->dtype(dtype);
+ node->rank(shape.size());
+
+ uint32_t size = 1;
+ for (uint32_t i = 0; i < shape.size(); ++i)
+ {
+ node->dim(i) = shape.at(i);
+ size *= shape.at(i);
+ }
+ node->shape_status(luci::ShapeStatus::VALID);
+
+#define INIT_VALUES(DT) \
+ { \
+ node->size<DT>(size); \
+ for (uint32_t i = 0; i < values.size(); ++i) \
+ node->at<DT>(i) = values[i]; \
+ }
+
+ switch (dtype)
+ {
+ case loco::DataType::U8:
+ INIT_VALUES(loco::DataType::U8);
+ break;
+ case loco::DataType::S16:
+ INIT_VALUES(loco::DataType::S16);
+ break;
+ case loco::DataType::S32:
+ INIT_VALUES(loco::DataType::S32);
+ break;
+ case loco::DataType::FLOAT32:
+ INIT_VALUES(loco::DataType::FLOAT32)
+ break;
+ default:
+ INTERNAL_EXN("create_const_node called with unsupported type");
+ break;
+ }
+ return node;
+}
+
+/**
+ * Simple graph for test
+ *
+ * BEFORE
+ *
+ * [IFM1] [IFM2] [BIAS]
+ * \ | /
+ * [FC]
+ * |
+ * [Res]
+ *
+ * AFTER
+ * [IFM1] [IFM2]
+ * \ |
+ * [BatchMatMul] [BIAS]
+ * \ /
+ * [Add]
+ * |
+ * [Res]
+ *
+ */
+struct FCGraphlet
+{
+public:
+ FCGraphlet() = default;
+ virtual ~FCGraphlet() = default;
+
+ void init(loco::Graph *g, const ShapeU32 r_shape, const float bv)
+ {
+ _tr_y = g->nodes()->create<luci::CircleTranspose>();
+ _tr_y->a(_y);
+ std::vector<int32_t> tr_val = {1, 0};
+ _tr_y->perm(create_const_node(g, loco::DataType::S32, {2}, tr_val));
+
+ _fc = g->nodes()->create<luci::CircleFullyConnected>();
+ _fc->input(_x);
+ _fc->weights(_tr_y);
+ _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _fc->dtype(loco::DataType::FLOAT32);
+ _fc->shape(r_shape);
+ auto l = _fc->dim(_fc->rank() - 1).value();
+ std::vector<float> bias_val(l, bv);
+ _fc->bias(create_const_node(g, loco::DataType::FLOAT32, {l}, bias_val));
+ _fc->name("fc");
+ }
+
+public:
+ luci::CircleFullyConnected *fc() { return _fc; }
+
+protected:
+ luci::CircleFullyConnected *_fc = nullptr;
+ luci::CircleTranspose *_tr_y = nullptr;
+ luci::CircleInput *_x = nullptr;
+ luci::CircleInput *_y = nullptr;
+};
+
+struct FCGraph : public TestIsGraphlet<2>, public TestOGraphlet, public FCGraphlet
+{
+ FCGraph() = default;
+ virtual ~FCGraph() = default;
+ void init(const ShapeU32 x_shape, const ShapeU32 y_shape, const ShapeU32 r_shape, const float bv)
+ {
+ TestIsGraphlet<2>::init(g(), {x_shape, y_shape});
+ TestOGraphlet::init(g(), r_shape);
+ _x = input(0);
+ _y = input(1);
+ FCGraphlet::init(g(), r_shape, bv);
+ output()->from(_fc);
+ }
+};
+
+class ReplaceNonConstFCWithBatchMatMulPassTest : public ::testing::Test
+{
+public:
+ FCGraph g;
+ luci::ReplaceNonConstFCWithBatchMatMulPass pass;
+};
+
+} // namespace
+
+TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, simple_test)
+{
+ g.init({2, 3}, {2, 3}, {2, 2}, 0.0f);
+
+ auto ret = pass.run(g.g());
+ EXPECT_EQ(true, ret);
+
+ auto mm = dynamic_cast<luci::CircleBatchMatMul *>(g.output()->from());
+ EXPECT_NE(nullptr, mm);
+}
+
+TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, nonzero_bias_test)
+{
+ g.init({2, 3}, {2, 3}, {2, 2}, 1.0f);
+
+ auto ret = pass.run(g.g());
+ EXPECT_EQ(true, ret);
+
+ auto mm = dynamic_cast<luci::CircleAdd *>(g.output()->from());
+ EXPECT_NE(nullptr, mm);
+}
+
+TEST_F(ReplaceNonConstFCWithBatchMatMulPassTest, wrong_op_NEG)
+{
+ loco::Graph g;
+
+ auto inp = g.nodes()->create<luci::CircleInput>();
+ auto relu = g.nodes()->create<luci::CircleRelu>();
+ relu->features(inp);
+
+ luci::ReplaceNonConstFCWithBatchMatMulPass pass;
+ auto changed = pass.run(&g);
+
+ EXPECT_EQ(false, changed);
+}
diff --git a/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp
new file mode 100644
index 000000000..a65065800
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpSplitVPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Service/Nodes/CircleConst.h>
+
+namespace
+{
+
+// Input node is const S64
+// Return s32 version of node
+// Return nullptr if s64 value is out of range of s32
+luci::CircleConst *s64_to_s32(luci::CircleConst *node)
+{
+ assert(node);
+ assert(node->dtype() == loco::DataType::S64);
+
+ auto cloned = luci::clone(node);
+ luci::add_origin(cloned, luci::get_origin(node));
+
+ const auto num_elems = node->size<loco::DataType::S64>();
+
+ cloned->dtype(loco::DataType::S32);
+ cloned->size<loco::DataType::S32>(num_elems);
+
+ for (uint32_t i = 0; i < num_elems; i++)
+ {
+ int64_t val = node->at<loco::DataType::S64>(i);
+ if (val < std::numeric_limits<int32_t>::min() or val > std::numeric_limits<int32_t>::max())
+ return nullptr;
+
+ cloned->at<loco::DataType::S32>(i) = static_cast<int32_t>(val);
+ }
+
+ return cloned;
+}
+
+/** BEFORE
+ *
+ * [CircleNode]
+ * \
+ * \ [size_splits] [split_dim]
+ * \ | /
+ * [CircleCustom(SplitV))]
+ * |
+ * [CircleCustomOut]
+ * |
+ * [CircleNode]
+ *
+ * AFTER
+ *
+ * [CircleNode]
+ * | \
+ * | \ [size_splits] [split_dim]
+ * | \ | /
+ * | \ | /
+ * | \ | /
+ * [CircleCustom(SplitV)] [CircleSplitV]
+ * | |
+ * [CircleCustomOut] [CircleSplitVOut]
+ * |
+ * [CircleNode]
+ */
+bool resolve_splitv(luci::CircleCustom *node)
+{
+ const std::string custom_code = node->custom_code();
+ const std::vector<uint8_t> custom_options = node->custom_options();
+
+ if (custom_code != "SplitV")
+ return false;
+
+ if (node->numInputs() != 3)
+ return false;
+
+ auto size_splits = dynamic_cast<luci::CircleConst *>(node->inputs(1));
+ if (not size_splits)
+ return false;
+
+ // Convert size_splits to S32, because luci-interpeter does not support
+ // S64 size_splits yet
+ // TODO Support S64 size_splits
+ if (size_splits->dtype() == loco::DataType::S64)
+ {
+ size_splits = s64_to_s32(size_splits);
+ if (not size_splits)
+ return false;
+ }
+ if (size_splits->dtype() != loco::DataType::S32)
+ return false;
+
+ auto split_dim = dynamic_cast<luci::CircleConst *>(node->inputs(2));
+ if (not split_dim)
+ return false;
+
+ if (split_dim->dtype() == loco::DataType::S64)
+ {
+ split_dim = s64_to_s32(split_dim);
+ if (not split_dim)
+ return false;
+ }
+ if (split_dim->dtype() != loco::DataType::S32)
+ return false;
+
+ if (size_splits->rank() != 1)
+ return false;
+
+ const auto num_split = size_splits->dim(0).value();
+
+ auto split_v = node->graph()->nodes()->create<luci::CircleSplitV>();
+ split_v->input(node->inputs(0));
+ split_v->size_splits(size_splits);
+ split_v->split_dim(split_dim);
+ split_v->num_split(num_split);
+ split_v->name(node->name());
+ luci::add_origin(split_v, luci::get_origin(node));
+
+ int32_t i = 0;
+ const auto succs = loco::succs(node);
+ for (auto succ : succs)
+ {
+ auto custom_out = loco::must_cast<luci::CircleCustomOut *>(succ); // FIX_CALLER_UNLESS
+
+ auto split_v_out = node->graph()->nodes()->create<luci::CircleSplitVOut>();
+ split_v_out->input(split_v);
+ split_v_out->name(node->name() + "_out_" + std::to_string(i));
+ split_v_out->index(i++);
+ luci::add_origin(split_v_out, luci::get_origin(node));
+ loco::replace(custom_out).with(split_v_out);
+ }
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool ResolveCustomOpSplitVPass::run(loco::Graph *g)
+{
+ bool changed = false;
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto cop = dynamic_cast<luci::CircleCustom *>(node);
+ if (not cop)
+ continue;
+
+ if (resolve_splitv(cop))
+ changed = true;
+ }
+
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp
new file mode 100644
index 000000000..e7738aadb
--- /dev/null
+++ b/compiler/luci/pass/src/ResolveCustomOpSplitVPass.test.cpp
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ResolveCustomOpSplitVPass.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <gtest/gtest.h>
+
+using namespace luci::test;
+
+namespace
+{
+
+/**
+ * graph having Custom operator SplitV
+ *
+ * [Input] [Const] [Const]
+ * \ | /
+ * [Custom(SplitV)]
+ * / | \
+ * [CustomOut] [CustomOut] [CustomOut]
+ * | | |
+ * [Output] [Output] [Output]
+ */
+class SplitVGraphlet
+{
+public:
+ SplitVGraphlet() = default;
+
+public:
+ void init(loco::Graph *g)
+ {
+ // CircleCustom(SplitV)
+ _splitv = g->nodes()->create<luci::CircleCustom>(3, 3);
+ _splitv->custom_code("SplitV");
+ _splitv->shape({1, 2, 2, 192});
+ _splitv->dtype(loco::DataType::FLOAT32);
+ _splitv->name("splitv");
+
+ // CircleConst
+ auto size_splits = g->nodes()->create<luci::CircleConst>();
+ size_splits->dtype(loco::DataType::S64);
+ size_splits->shape({3});
+ size_splits->size<loco::DataType::S64>(3);
+ size_splits->at<loco::DataType::S64>(0) = 32;
+ size_splits->at<loco::DataType::S64>(1) = 32;
+ size_splits->at<loco::DataType::S64>(2) = 128;
+
+ // CircleConst
+ auto split_dim = g->nodes()->create<luci::CircleConst>();
+ split_dim->dtype(loco::DataType::S32);
+ split_dim->rank(0);
+ split_dim->size<loco::DataType::S32>(1);
+ split_dim->scalar<loco::DataType::S32>() = 3;
+
+ _splitv->inputs(1, size_splits);
+ _splitv->inputs(2, split_dim);
+
+ // CircleCustomOut
+ _splitv_out1 = g->nodes()->create<luci::CircleCustomOut>();
+ _splitv_out1->shape({1, 2, 2, 32});
+ _splitv_out1->dtype(loco::DataType::FLOAT32);
+ _splitv_out1->index(0);
+ _splitv_out1->input(_splitv);
+
+ // CircleCustomOut
+ _splitv_out2 = g->nodes()->create<luci::CircleCustomOut>();
+ _splitv_out2->shape({1, 2, 2, 32});
+ _splitv_out2->dtype(loco::DataType::FLOAT32);
+ _splitv_out2->index(1);
+ _splitv_out2->input(_splitv);
+
+ // CircleCustomOut
+ _splitv_out3 = g->nodes()->create<luci::CircleCustomOut>();
+ _splitv_out3->shape({1, 2, 2, 128});
+ _splitv_out3->dtype(loco::DataType::FLOAT32);
+ _splitv_out3->index(2);
+ _splitv_out3->input(_splitv);
+ }
+
+public:
+ luci::CircleCustom *splitv() { return _splitv; }
+
+protected:
+ luci::CircleCustom *_splitv = nullptr;
+ luci::CircleCustomOut *_splitv_out1 = nullptr;
+ luci::CircleCustomOut *_splitv_out2 = nullptr;
+ luci::CircleCustomOut *_splitv_out3 = nullptr;
+};
+
+class SplitVGraph : public TestIGraphlet, public TestOsGraphlet<3>, public SplitVGraphlet
+{
+public:
+ SplitVGraph() = default;
+
+ void init(void)
+ {
+ TestIGraphlet::init(g(), {1, 2, 2, 192});
+ TestOsGraphlet<3>::init(g(), {{1, 2, 2, 32}, {1, 2, 2, 32}, {1, 2, 2, 128}});
+ SplitVGraphlet::init(g());
+
+ // connect graph
+ _splitv->inputs(0, input());
+
+ output(0)->from(_splitv_out1);
+ output(1)->from(_splitv_out2);
+ output(2)->from(_splitv_out3);
+ }
+};
+
+class SplitVGraphTest : public ::testing::Test
+{
+public:
+ SplitVGraph g;
+ luci::ResolveCustomOpSplitVPass pass;
+};
+
+} // namespace
+
+TEST_F(SplitVGraphTest, simple_test)
+{
+ g.init();
+
+ auto ret = pass.run(g.g());
+ EXPECT_EQ(true, ret);
+
+ auto svo_1 = dynamic_cast<luci::CircleSplitVOut *>(g.output(0)->from());
+ EXPECT_NE(nullptr, svo_1);
+ auto svo_2 = dynamic_cast<luci::CircleSplitVOut *>(g.output(1)->from());
+ EXPECT_NE(nullptr, svo_2);
+ auto svo_3 = dynamic_cast<luci::CircleSplitVOut *>(g.output(2)->from());
+ EXPECT_NE(nullptr, svo_3);
+
+ auto sv = dynamic_cast<luci::CircleSplitV *>(svo_1->input());
+ EXPECT_NE(nullptr, sv);
+ sv = dynamic_cast<luci::CircleSplitV *>(svo_2->input());
+ EXPECT_NE(nullptr, sv);
+ sv = dynamic_cast<luci::CircleSplitV *>(svo_3->input());
+ EXPECT_NE(nullptr, sv);
+
+ auto size_splits = loco::must_cast<luci::CircleConst *>(sv->size_splits());
+ EXPECT_EQ(loco::DataType::S32, size_splits->dtype());
+ EXPECT_EQ(32, size_splits->at<loco::DataType::S32>(0));
+ EXPECT_EQ(32, size_splits->at<loco::DataType::S32>(1));
+ EXPECT_EQ(128, size_splits->at<loco::DataType::S32>(2));
+
+ auto split_dim = loco::must_cast<luci::CircleConst *>(sv->split_dim());
+ EXPECT_EQ(loco::DataType::S32, split_dim->dtype());
+ EXPECT_EQ(3, split_dim->scalar<loco::DataType::S32>());
+}
+
+TEST_F(SplitVGraphTest, wrong_op_NEG)
+{
+ g.init();
+
+ g.splitv()->custom_code("AddV2");
+
+ auto ret = pass.run(g.g());
+ EXPECT_EQ(false, ret);
+}
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h
index 442183c18..408e6b8d9 100644
--- a/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h
@@ -197,6 +197,13 @@ private:
return true;
}
+ bool visit(const luci::CircleReduceMax *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
bool visit(const luci::CircleRelu *node)
{
RETURN_FALSE_UNLESS(is_lwq(node));
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp
index 4e1c062c0..cf86acabe 100644
--- a/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp
@@ -302,6 +302,15 @@ bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePow *nod
}
template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleReduceMax *node)
+{
+ RETURN_FALSE_UNLESS(has_type(node, Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+ RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), loco::DataType::S32))
+ return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleRelu *node)
{
return group_has_type(node, Qtype);
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.h b/compiler/luci/pass/src/VerifyQuantizedNodeType.h
index ff1acbd6f..789d3c7cd 100644
--- a/compiler/luci/pass/src/VerifyQuantizedNodeType.h
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.h
@@ -104,6 +104,7 @@ private:
bool visit(const luci::CirclePadV2 *node);
bool visit(const luci::CirclePRelu *node);
bool visit(const luci::CirclePow *node);
+ bool visit(const luci::CircleReduceMax *node);
bool visit(const luci::CircleRelu *node);
bool visit(const luci::CircleReshape *node);
bool visit(const luci::CircleResizeBilinear *node);
diff --git a/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp b/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp
new file mode 100644
index 000000000..72b7d60ff
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/SparsityFormatConverter.cpp
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// codes under namespace sparsity referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h
+// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc
+
+#include "SparsityFormatConverter.h"
+
+#include <oops/InternalExn.h>
+
+#include <cassert>
+
+namespace sparsity
+{
+
+namespace
+{
+
+uint64_t GetFlattenedIndex(const std::vector<int> &indices, const std::vector<int> &shape)
+{
+ uint64_t index = 0;
+ int sub_elements = 1;
+ for (int i = shape.size() - 1; i >= 0; i--)
+ {
+ index += indices[i] * sub_elements;
+ sub_elements *= shape[i];
+ }
+ return index;
+}
+
+std::vector<int> TfLiteIntArrayToVector(const TfLiteIntArray *int_array)
+{
+ std::vector<int> values;
+ if (!int_array)
+ {
+ return values;
+ }
+
+ values.resize(int_array->size);
+ for (int i = 0; i < int_array->size; i++)
+ {
+ values[i] = int_array->data[i];
+ }
+
+ return values;
+}
+
+} // namespace
+
+template <typename T>
+FormatConverter<T>::FormatConverter(const std::vector<int> &shape, const TfLiteSparsity &sparsity)
+{
+ auto traversal_order = TfLiteIntArrayToVector(sparsity.traversal_order);
+ auto block_map = TfLiteIntArrayToVector(sparsity.block_map);
+
+ std::vector<TfLiteDimensionType> format(sparsity.dim_metadata_size);
+ std::vector<int> dense_size(sparsity.dim_metadata_size);
+ std::vector<std::vector<int>> segments(sparsity.dim_metadata_size);
+ std::vector<std::vector<int>> indices(sparsity.dim_metadata_size);
+ for (int i = 0; i < sparsity.dim_metadata_size; i++)
+ {
+ format[i] = sparsity.dim_metadata[i].format;
+ dense_size[i] = sparsity.dim_metadata[i].dense_size;
+ segments[i] = TfLiteIntArrayToVector(sparsity.dim_metadata[i].array_segments);
+ indices[i] = TfLiteIntArrayToVector(sparsity.dim_metadata[i].array_indices);
+ }
+
+ InitSparseToDenseConverter(shape, std::move(traversal_order), std::move(format),
+ std::move(dense_size), std::move(segments), std::move(indices),
+ std::move(block_map));
+}
+
+template <typename T>
+void FormatConverter<T>::InitSparseToDenseConverter(
+ std::vector<int> shape, std::vector<int> traversal_order, std::vector<TfLiteDimensionType> format,
+ std::vector<int> dense_size, std::vector<std::vector<int>> segments,
+ std::vector<std::vector<int>> indices, std::vector<int> block_map)
+{
+ dense_shape_ = std::move(shape);
+ traversal_order_ = std::move(traversal_order);
+ block_map_ = std::move(block_map);
+ format_ = std::move(format);
+
+ dense_size_ = 1;
+ for (size_t i = 0; i < dense_shape_.size(); i++)
+ {
+ dense_size_ *= dense_shape_[i];
+ }
+
+ dim_metadata_.resize(2 * format_.size());
+ for (size_t i = 0; i < format_.size(); i++)
+ {
+ if (format_[i] == kTfLiteDimDense)
+ {
+ dim_metadata_[2 * i] = {dense_size[i]};
+ }
+ else
+ {
+ dim_metadata_[2 * i] = std::move(segments[i]);
+ dim_metadata_[2 * i + 1] = std::move(indices[i]);
+ }
+ }
+
+ int original_rank = dense_shape_.size();
+ int block_dim = 0;
+
+ blocked_shape_.resize(original_rank);
+ block_size_.resize(block_map_.size());
+ for (int i = 0; i < original_rank; i++)
+ {
+ if (block_dim < (int)block_map_.size() && block_map_[block_dim] == i)
+ {
+ if (original_rank + block_dim < (int)traversal_order_.size())
+ {
+ int orig_dim = traversal_order_[original_rank + block_dim];
+ block_size_[block_dim] = dense_size[orig_dim];
+ blocked_shape_[i] = dense_shape_[i] / dense_size[orig_dim];
+ block_dim++;
+ }
+ }
+ else
+ {
+ blocked_shape_[i] = dense_shape_[i];
+ }
+ }
+}
+
+template <typename T>
+void FormatConverter<T>::Populate(const T *src_data, std::vector<int> indices, int level,
+ int prev_idx, int *src_data_ptr, T *dest_data)
+{
+ if (static_cast<size_t>(level) == indices.size())
+ {
+ int orig_rank = dense_shape_.size();
+ std::vector<int> orig_idx;
+ orig_idx.resize(orig_rank);
+ int i = 0;
+ for (; static_cast<size_t>(i) < orig_idx.size(); i++)
+ {
+ int orig_dim = traversal_order_[i];
+ orig_idx[orig_dim] = indices[i];
+ }
+
+ for (; static_cast<size_t>(i) < indices.size(); i++)
+ {
+ const int block_idx = traversal_order_[i] - orig_rank;
+ const int orig_dim = block_map_[block_idx];
+ orig_idx[orig_dim] = orig_idx[orig_dim] * block_size_[block_idx] + indices[i];
+ }
+
+ dest_data[GetFlattenedIndex(orig_idx, dense_shape_)] = src_data[*src_data_ptr];
+
+ *src_data_ptr = *src_data_ptr + 1;
+ return;
+ }
+
+ const int metadata_idx = 2 * level;
+ const int shape_of_level = dim_metadata_[metadata_idx][0];
+ if (format_[level] == kTfLiteDimDense)
+ {
+ for (int i = 0; i < shape_of_level; i++)
+ {
+ indices[level] = i;
+ Populate(src_data, indices, level + 1, prev_idx * shape_of_level + i, src_data_ptr,
+ dest_data);
+ }
+ }
+ else if (static_cast<size_t>(prev_idx + 1) < dim_metadata_[metadata_idx].size())
+ {
+ const auto &array_segments = dim_metadata_[metadata_idx];
+ const auto &array_indices = dim_metadata_[metadata_idx + 1];
+ for (int i = array_segments[prev_idx]; i < array_segments[prev_idx + 1]; i++)
+ {
+ if (static_cast<size_t>(i) < array_indices.size() &&
+ static_cast<size_t>(level) < indices.size())
+ {
+ indices[level] = array_indices[i];
+ Populate(src_data, indices, level + 1, i, src_data_ptr, dest_data);
+ }
+ }
+ }
+}
+
+template <typename T> bool FormatConverter<T>::SparseToDense(const T *src_data)
+{
+ data_.resize(dense_size_);
+ std::fill(data_.begin(), data_.end(), T(0));
+
+ int total_rank = traversal_order_.size();
+ int src_data_ptr = 0;
+ std::vector<int> indices(total_rank);
+ Populate(src_data, indices, 0, 0, &src_data_ptr, data_.data());
+
+ return true;
+}
+
+template class FormatConverter<float>;
+template class FormatConverter<uint16_t>;
+
+} // namespace sparsity
+
+#include <luci/IR/SparsityParam.h>
+
+namespace luci
+{
+
+sparsity::TfLiteDimensionType to_tflite_sparsity(luci::DimensionType dt)
+{
+ switch (dt)
+ {
+ case luci::DimensionType::DENSE:
+ return sparsity::TfLiteDimensionType::kTfLiteDimDense;
+ case luci::DimensionType::SPARSE_CSR:
+ return sparsity::TfLiteDimensionType::kTfLiteDimSparseCSR;
+ }
+ return sparsity::TfLiteDimensionType::kTfLiteDimDense;
+}
+
+sparsity::TfLiteIntArray *to_tflite_sparsity(const luci::SparseIndexVector &data)
+{
+ auto type = data.type();
+ switch (type)
+ {
+ case luci::SparseIndexVectorType::NONE:
+ {
+ std::vector<int32_t> empty;
+ return makeTfLiteArray(empty);
+ }
+ case luci::SparseIndexVectorType::I32:
+ return makeTfLiteArray<int32_t>(*data.as_int32_vector());
+ case luci::SparseIndexVectorType::U16:
+ return makeTfLiteArray<uint16_t>(*data.as_uint16_vector());
+ case luci::SparseIndexVectorType::U8:
+ return makeTfLiteArray<uint8_t>(*data.as_uint8_vector());
+ default:
+ INTERNAL_EXN_V("unsupported SparseIndexVectorType", oops::to_uint32(type));
+ }
+}
+
+sparsity::TfLiteSparsity to_tflite_sparsity(const luci::SparsityParam *sp)
+{
+ sparsity::TfLiteSparsity tflsp;
+ tflsp.traversal_order = makeTfLiteArray(sp->traversal_order);
+ tflsp.block_map = makeTfLiteArray(sp->block_map);
+ tflsp.dim_metadata = makeTfLiteDimensionMetadata(sp->dim_metadata);
+ tflsp.dim_metadata_size = sp->dim_metadata.size();
+ return tflsp;
+}
+
+template <typename T> sparsity::TfLiteIntArray *makeTfLiteArray(const std::vector<T> &data)
+{
+ size_t cn = data.size();
+ size_t sz = 1 + data.size();
+ sparsity::TfLiteIntArray *sp = (sparsity::TfLiteIntArray *)(new int[sz]);
+ sp->size = cn;
+ for (size_t i = 0; i < cn; ++i)
+ {
+ sp->data[i] = data[i];
+ }
+ return sp;
+}
+
+sparsity::TfLiteDimensionMetadata *
+makeTfLiteDimensionMetadata(const std::vector<luci::DimMetaData> &data)
+{
+ size_t cn = data.size();
+ sparsity::TfLiteDimensionMetadata *tfldm = new sparsity::TfLiteDimensionMetadata[cn];
+
+ for (size_t i = 0; i < cn; ++i)
+ {
+ tfldm[i].format = to_tflite_sparsity(data[i].format());
+ tfldm[i].dense_size = data[i].dense_size();
+ tfldm[i].array_segments = to_tflite_sparsity(data[i].array_segments());
+ tfldm[i].array_indices = to_tflite_sparsity(data[i].array_indices());
+ }
+
+ return tfldm;
+}
+
+void freeTfLiteSparsity(sparsity::TfLiteSparsity &tflsp)
+{
+ assert(tflsp.traversal_order);
+ assert(tflsp.block_map);
+ delete[] tflsp.traversal_order;
+ delete[] tflsp.block_map;
+
+ for (int i = 0; i < tflsp.dim_metadata_size; ++i)
+ {
+ assert(tflsp.dim_metadata[i].array_segments);
+ assert(tflsp.dim_metadata[i].array_indices);
+ delete[] tflsp.dim_metadata[i].array_segments;
+ delete[] tflsp.dim_metadata[i].array_indices;
+ }
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/helpers/SparsityFormatConverter.h b/compiler/luci/pass/src/helpers/SparsityFormatConverter.h
new file mode 100644
index 000000000..fcd9bbcd0
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/SparsityFormatConverter.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__
+#define __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__
+
+#include <cstdint>
+#include <vector>
+
+// codes under namespace sparsity referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h
+// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc
+
+namespace sparsity
+{
+
+// Storage format of each dimension in a sparse tensor.
+typedef enum TfLiteDimensionType
+{
+ kTfLiteDimDense = 0,
+ kTfLiteDimSparseCSR,
+} TfLiteDimensionType;
+
+// Fixed size list of integers. Used for dimensions and inputs/outputs tensor
+// indices
+typedef struct TfLiteIntArray
+{
+ int size;
+ int data[];
+} TfLiteIntArray;
+
+// Metadata to encode each dimension in a sparse tensor.
+typedef struct TfLiteDimensionMetadata
+{
+ TfLiteDimensionType format;
+ int dense_size;
+ TfLiteIntArray *array_segments;
+ TfLiteIntArray *array_indices;
+} TfLiteDimensionMetadata;
+
+// Parameters used to encode a sparse tensor. For detailed explanation of each
+// field please refer to lite/schema/schema.fbs.
+typedef struct TfLiteSparsity
+{
+ TfLiteIntArray *traversal_order;
+ TfLiteIntArray *block_map;
+ TfLiteDimensionMetadata *dim_metadata;
+ int dim_metadata_size;
+} TfLiteSparsity;
+
+// A converter that keeps an internal representation of sparse tensor parameters
+// and converts tensors between dense and sparse formats.
+template <typename T> class FormatConverter
+{
+public:
+ /* Creates a sparse to dense converter.
+ * @param shape Shape of the target dense tensor.
+ * @param sparsity Sparsity parameter of the sparse TfLiteTensor.
+ */
+ FormatConverter(const std::vector<int> &shape, const TfLiteSparsity &sparsity);
+
+ const std::vector<T> &GetData() { return data_; }
+ const std::vector<std::vector<int>> &GetDimMetadata() { return dim_metadata_; }
+
+ bool SparseToDense(const T *src_data);
+
+private:
+ // Helper function for initializing this converter for sparse to dense
+ // conversion.
+ void InitSparseToDenseConverter(std::vector<int> shape, std::vector<int> traversal_order,
+ std::vector<TfLiteDimensionType> format,
+ std::vector<int> dense_size,
+ std::vector<std::vector<int>> segments,
+ std::vector<std::vector<int>> indices,
+ std::vector<int> block_map);
+
+ void Populate(const T *src_data, std::vector<int> indices, int level, int prev_idx,
+ int *src_data_ptr, T *dest_data);
+
+private:
+ std::vector<int> dense_shape_;
+ std::vector<int> blocked_shape_;
+ size_t dense_size_;
+ std::vector<int> traversal_order_;
+ std::vector<TfLiteDimensionType> format_;
+ std::vector<int> block_size_;
+ std::vector<int> block_map_;
+ std::vector<std::vector<int>> dim_metadata_;
+ std::vector<T> data_;
+};
+
+extern template class FormatConverter<float>;
+extern template class FormatConverter<uint16_t>;
+
+} // namespace sparsity
+
+#include <luci/IR/SparsityParam.h>
+
+namespace luci
+{
+
+sparsity::TfLiteDimensionType to_tflite_sparsity(luci::DimensionType dt);
+sparsity::TfLiteIntArray *to_tflite_sparsity(const luci::SparseIndexVector &data);
+sparsity::TfLiteSparsity to_tflite_sparsity(const luci::SparsityParam *sp);
+
+template <typename T> sparsity::TfLiteIntArray *makeTfLiteArray(const std::vector<T> &data);
+sparsity::TfLiteDimensionMetadata *
+makeTfLiteDimensionMetadata(const std::vector<luci::DimMetaData> &data);
+
+void freeTfLiteSparsity(sparsity::TfLiteSparsity &tflsp);
+
+} // namespace luci
+
+#endif // __LUCI_PASS_HELPERS_SPARSITY_FORMAT_CONVERTER_H__
diff --git a/compiler/luci/requires.cmake b/compiler/luci/requires.cmake
index e896188be..0a5e6a58b 100644
--- a/compiler/luci/requires.cmake
+++ b/compiler/luci/requires.cmake
@@ -10,4 +10,5 @@ require("oops")
require("hermes")
require("hermes-std")
require("tflchef")
+require("circlechef")
require("tflite2circle")
diff --git a/compiler/luci/service/src/CircleCloneNode.h b/compiler/luci/service/src/CircleCloneNode.h
index 99e4561b3..95f06db4c 100644
--- a/compiler/luci/service/src/CircleCloneNode.h
+++ b/compiler/luci/service/src/CircleCloneNode.h
@@ -72,6 +72,7 @@ public:
CloneNodeLet(loco::Graph *graph) : _graph(graph){};
public:
+ luci::CircleNode *visit(const luci::CircleDensify *) final;
luci::CircleNode *visit(const luci::CircleDepthToSpace *) final;
luci::CircleNode *visit(const luci::CircleDepthwiseConv2D *) final;
luci::CircleNode *visit(const luci::CircleDequantize *) final;
diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
index 9d156f3e2..a368faef4 100644
--- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
@@ -204,6 +204,7 @@ template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node)
return loco::NodeShape{inputs_shape}; \
}
+DECLARE_USE_SINGLE(input);
DECLARE_USE_SINGLE(inputs);
DECLARE_USE_SINGLE(x);
DECLARE_USE_SINGLE(logits);
@@ -258,10 +259,10 @@ loco::NodeShape infer_add_n(const luci::CircleAddN *node)
return loco::NodeShape{shape};
}
-loco::NodeShape infer_arg_max(const luci::CircleArgMax *node)
+template <class CIRCLENODE> loco::NodeShape infer_arg_maxmin(const CIRCLENODE *node)
{
- auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
- auto dimension_shape = luci::shape_get(node->dimension()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).template as<loco::TensorShape>();
+ auto dimension_shape = luci::shape_get(node->dimension()).template as<loco::TensorShape>();
int64_t select_axis = 0;
{
@@ -271,55 +272,19 @@ loco::NodeShape infer_arg_max(const luci::CircleArgMax *node)
// Support S32 for now.
auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
- "Only support int32 CircleConst for CircleArgMax");
+ "Only support int32 CircleConst for CircleArgMax/CircleArgMin");
if (const_shape_node->rank() > 1)
INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
oops::to_uint32(const_shape_node->rank()));
- select_axis = const_shape_node->scalar<loco::DataType::S32>();
- }
- assert(select_axis < input_shape.rank());
- assert(select_axis >= 0); // TODO support minus of this breaks
-
- // NOTE select_axis is removed
- loco::TensorShape shape_output;
- uint32_t rank = input_shape.rank();
- uint32_t shrink = static_cast<uint32_t>(select_axis);
- assert(rank > 0);
- shape_output.rank(rank - 1);
- for (uint32_t r = 0, d = 0; r < rank; ++r)
- {
- if (r == shrink)
- continue;
- shape_output.dim(d++) = input_shape.dim(r);
+ select_axis = const_shape_node->template scalar<loco::DataType::S32>();
}
- return loco::NodeShape{shape_output};
-}
-
-loco::NodeShape infer_arg_min(const luci::CircleArgMin *node)
-{
- auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
- auto dimension_shape = luci::shape_get(node->dimension()).as<loco::TensorShape>();
-
- int64_t select_axis = 0;
- {
- LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
-
- // Only support node's shape() is CircleConst with S32/S64
- // Support S32 for now.
- auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
- LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
- "Only support int32 CircleConst for CircleArgMin");
-
- if (const_shape_node->rank() > 1)
- INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
- oops::to_uint32(const_shape_node->rank()));
- select_axis = const_shape_node->scalar<loco::DataType::S32>();
- }
assert(select_axis < input_shape.rank());
- assert(select_axis >= 0); // TODO support minus of this breaks
+
+ if (select_axis < 0)
+ select_axis += input_shape.rank();
// NOTE select_axis is removed
loco::TensorShape shape_output;
@@ -1180,45 +1145,17 @@ loco::NodeShape infer_reshape(const luci::CircleReshape *node)
return loco::NodeShape{output_shape};
}
-loco::NodeShape infer_resize_bilinear(const luci::CircleResizeBilinear *node)
+template <class CIRCLENODE> loco::NodeShape infer_resize_type(const CIRCLENODE *node)
{
- auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
-
- if (input_shape.rank() != 4)
- INTERNAL_EXN("Expected ResizeBilinear input to have rank 4");
-
- auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
-
- if (const_node->dtype() != loco::DataType::S32)
- INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size");
-
- if (const_node->rank() != 1)
- INTERNAL_EXN("Expected size tensor of rank 1");
-
- if (const_node->dim(0).value() != 2)
- INTERNAL_EXN("Expected size tensor with shape [2]");
-
- loco::TensorShape output_shape;
- output_shape.rank(4);
- output_shape.dim(0) = input_shape.dim(0);
- output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
- output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
- output_shape.dim(3) = input_shape.dim(3);
-
- return loco::NodeShape{output_shape};
-}
-
-loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNeighbor *node)
-{
- auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+ auto input_shape = luci::shape_get(node->input()).template as<loco::TensorShape>();
if (input_shape.rank() != 4)
- INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4");
+ INTERNAL_EXN("Expected input to have rank 4");
auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
if (const_node->dtype() != loco::DataType::S32)
- INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size");
+ INTERNAL_EXN("Only S32 datatype is supported for size");
if (const_node->rank() != 1)
INTERNAL_EXN("Expected size tensor of rank 1");
@@ -1229,8 +1166,8 @@ loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNei
loco::TensorShape output_shape;
output_shape.rank(4);
output_shape.dim(0) = input_shape.dim(0);
- output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
- output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
+ output_shape.dim(1) = const_node->template at<loco::DataType::S32>(0);
+ output_shape.dim(2) = const_node->template at<loco::DataType::S32>(1);
output_shape.dim(3) = input_shape.dim(3);
return loco::NodeShape{output_shape};
@@ -2080,9 +2017,9 @@ public:
loco::NodeShape visit(const luci::CircleAddN *node) final { return infer_add_n(node); }
- loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_max(node); }
+ loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_maxmin(node); }
- loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_min(node); }
+ loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_maxmin(node); }
loco::NodeShape visit(const luci::CircleAveragePool2D *node) final
{
@@ -2119,6 +2056,8 @@ public:
loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); }
+ loco::NodeShape visit(const luci::CircleDensify *node) final { return use_input(node); }
+
loco::NodeShape visit(const luci::CircleDepthToSpace *node) final
{
return infer_depth_to_space(node);
@@ -2348,12 +2287,12 @@ public:
loco::NodeShape visit(const luci::CircleResizeBilinear *node) final
{
- return infer_resize_bilinear(node);
+ return infer_resize_type(node);
}
loco::NodeShape visit(const luci::CircleResizeNearestNeighbor *node) final
{
- return infer_resize_nearest_neighbor(node);
+ return infer_resize_type(node);
}
loco::NodeShape visit(const luci::CircleReverseSequence *node) final
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
index 438c4a364..7616390ae 100644
--- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
@@ -102,6 +102,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
return node->dtype();
}
+ loco::DataType visit(const luci::CircleDensify *node) final
+ {
+ return luci::dtype_get(node->input());
+ }
+
loco::DataType visit(const luci::CircleDepthToSpace *node) final
{
return luci::dtype_get(node->input());
diff --git a/compiler/luci/service/src/Nodes/CircleDensify.cpp b/compiler/luci/service/src/Nodes/CircleDensify.cpp
new file mode 100644
index 000000000..a0d15b6c7
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDensify.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::DEF>::visit(const luci::CircleDensify *)
+{
+ return _graph->nodes()->create<luci::CircleDensify>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleDensify.test.cpp b/compiler/luci/service/src/Nodes/CircleDensify.test.cpp
new file mode 100644
index 000000000..d0f32c1a2
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleDensify.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Densify)
+{
+ auto g = loco::make_graph();
+ auto node_densify = g->nodes()->create<luci::CircleDensify>();
+
+ auto gc = loco::make_graph();
+ auto cloned = luci::clone_node(node_densify, gc.get());
+ ASSERT_NE(nullptr, cloned);
+ ASSERT_EQ(gc.get(), cloned->graph());
+
+ auto cloned_densify = dynamic_cast<luci::CircleDensify *>(cloned);
+ ASSERT_NE(nullptr, cloned_densify);
+}
diff --git a/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp b/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp
index c5864f938..77135cca0 100644
--- a/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp
+++ b/compiler/luci/service/src/ShapeInfer_StridedSlice.cpp
@@ -24,16 +24,22 @@
#include <loco/IR/NodeShape.h>
#include <oops/InternalExn.h>
+#include <algorithm>
#include <cmath>
#include <cstdint>
#include <limits>
+// code referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+// tensorflow/lite/kernels/strided_slice.cc
+// tensorflow/lite/kernels/internal/strided_slice_logic.h
+
namespace
{
-// This Op only supports 1-4D cases and since we use the reference 4D
+// This Op only supports 1-5D cases and since we use the reference 4D
// implementation, the 1-3D tensors are mapped to 4D.
-const int kMaxDim = 4;
+const int kMaxDim = 5;
const loco::DataType S32 = loco::DataType::S32;
@@ -42,18 +48,47 @@ using int16 = int16_t;
struct StridedSliceParams
{
- int8 start_indices_count;
+ int8 start_indices_count = 0;
int16 start_indices[kMaxDim];
- int8 stop_indices_count;
+ int8 stop_indices_count = 0;
int16 stop_indices[kMaxDim];
- int8 strides_count;
+ int8 strides_count = 0;
int16 strides[kMaxDim];
- int16 begin_mask;
- int16 ellipsis_mask;
- int16 end_mask;
- int16 new_axis_mask;
- int16 shrink_axis_mask;
+ int16 begin_mask = 0;
+ int16 ellipsis_mask = 0;
+ int16 end_mask = 0;
+ int16 new_axis_mask = 0;
+ int16 shrink_axis_mask = 0;
+};
+
+struct StridedSliceContext
+{
+ StridedSliceContext(const luci::CircleStridedSlice *node)
+ {
+ params.begin_mask = node->begin_mask();
+ params.ellipsis_mask = node->ellipsis_mask();
+ params.end_mask = node->end_mask();
+ params.new_axis_mask = node->new_axis_mask();
+ params.shrink_axis_mask = node->shrink_axis_mask();
+
+ input = loco::must_cast<luci::CircleNode *>(node->input());
+ begin = loco::must_cast<luci::CircleConst *>(node->begin());
+ end = loco::must_cast<luci::CircleConst *>(node->end());
+ strides = loco::must_cast<luci::CircleConst *>(node->strides());
+
+ loco::TensorShape input_shape = luci::shape_get(input).as<loco::TensorShape>();
+ input_dims = input_shape.rank();
+ }
+ StridedSliceParams params;
+ luci::CircleNode *input = nullptr;
+ luci::CircleConst *begin = nullptr;
+ luci::CircleConst *end = nullptr;
+ luci::CircleConst *strides = nullptr;
+
+ // Equivalent input shape after adding axis according to new_axis_mask.
+ loco::TensorShape effective_input_shape;
+ uint32_t input_dims = 0;
};
// Use until std::clamp() is available from C++17.
@@ -70,8 +105,8 @@ inline int Clamp(const int32_t v, const int32_t lo, const int32_t hi)
// Return the index for the first element along that axis. This index will be a
// positive integer between [0, axis_size - 1] that can be used to index
// directly into the data.
-inline int StartForAxis(const StridedSliceParams &params, const loco::TensorShape &input_shape,
- uint32_t axis)
+inline int32_t StartForAxis(const StridedSliceParams &params, const loco::TensorShape &input_shape,
+ uint32_t axis)
{
const auto begin_mask = params.begin_mask;
const auto *start_indices = params.start_indices;
@@ -108,7 +143,16 @@ inline int StartForAxis(const StridedSliceParams &params, const loco::TensorShap
}
// Clamping
- start = Clamp(start, 0, axis_size - 1);
+ if (strides[axis] > 0)
+ {
+ // Forward iteration
+ start = Clamp(start, 0, axis_size);
+ }
+ else
+ {
+ // Backward iteration
+ start = Clamp(start, -1, axis_size - 1);
+ }
return start;
}
@@ -118,14 +162,14 @@ inline int StartForAxis(const StridedSliceParams &params, const loco::TensorShap
// element. ie. So if you were iterating through all elements of a 1D array of
// size 4, this function would return 4 as the stop, because it is one past the
// "real" indices of 0, 1, 2 & 3.
-inline int StopForAxis(const StridedSliceParams &params, const loco::TensorShape &input_shape,
- int axis, int start_for_axis)
+inline int32_t StopForAxis(const StridedSliceParams &params, const loco::TensorShape &input_shape,
+ int32_t axis, int32_t start_for_axis)
{
const auto end_mask = params.end_mask;
const auto shrink_axis_mask = params.shrink_axis_mask;
const auto *stop_indices = params.stop_indices;
const auto *strides = params.strides;
- const int axis_size = static_cast<int32_t>(input_shape.dim(axis).value());
+ const int32_t axis_size = static_cast<int32_t>(input_shape.dim(axis).value());
if (axis_size == 0)
{
return 0;
@@ -141,7 +185,7 @@ inline int StopForAxis(const StridedSliceParams &params, const loco::TensorShape
// already been adjusted for negative indices.
if (shrink_axis)
{
- stop = start_for_axis + 1;
+ return start_for_axis + 1;
}
// end_mask override
@@ -183,37 +227,125 @@ inline int StopForAxis(const StridedSliceParams &params, const loco::TensorShape
return stop;
}
-StridedSliceParams BuildStridedSliceParams(const luci::CircleStridedSlice *node)
+StridedSliceParams BuildStridedSliceParams(StridedSliceContext *op_context)
{
StridedSliceParams op_params;
- if (kMaxDim < node->rank())
+ // The ellipsis_mask and new_axis_mask in op_params are not used. Those masks
+ // are processed here to update begin_mask, end_mask and the index range.
+ op_params.begin_mask = 0;
+ op_params.ellipsis_mask = 0;
+ op_params.end_mask = 0;
+ op_params.new_axis_mask = 0;
+ op_params.shrink_axis_mask = 0;
+
+ // Count indexes where the new_axis_mask is set but the ellipsis_mask is not.
+ loco::TensorShape begin_shape = luci::shape_get(op_context->begin).as<loco::TensorShape>();
+ const uint32_t begin_count = begin_shape.dim(0).value();
+ uint32_t num_add_axis = 0;
+ for (uint32_t i = 0; i < begin_count; ++i)
{
- INTERNAL_EXN_V("Cannot support StridedSlice rank > ", kMaxDim);
+ if (!((1 << i) & op_context->params.ellipsis_mask) &&
+ ((1 << i) & op_context->params.new_axis_mask))
+ {
+ num_add_axis++;
+ }
}
- auto begin_node = loco::must_cast<luci::CircleConst *>(node->begin());
- auto end_node = loco::must_cast<luci::CircleConst *>(node->end());
- auto strides_node = loco::must_cast<luci::CircleConst *>(node->strides());
+ // Calculate the dims of input after adding new axises.
+ const uint32_t effective_dims = op_context->input_dims + num_add_axis;
+
+ // If begin, end and strides are not fully provided, it means Ellipsis should
+ // be expanded to multiple dimensions (Ex: for spec [Ellipsis, 2] on a 3D
+ // input, the Ellipsis should be applied for the first 2 dimensions). Besides,
+ // If the new_axis_mask and the ellipsis_mask are set at the same index, the
+ // new_axis_mask will have no effect.
+ int32_t effective_ellipsis_mask = 0, effective_new_axis_mask = 0;
+ uint32_t ellipsis_start_idx = effective_dims, expanded_ellipsis = 0;
+ for (uint32_t i = 0; i < effective_dims;)
+ {
+ if ((1 << i) & op_context->params.ellipsis_mask)
+ {
+ ellipsis_start_idx = i;
+ uint32_t ellipsis_end_idx =
+ std::max(i + 1, std::min(i + 1 + num_add_axis + op_context->input_dims - begin_count,
+ effective_dims));
+ expanded_ellipsis = ellipsis_end_idx - ellipsis_start_idx - 1;
+
+ // Set bit for effective_ellipsis_mask.
+ for (; i < ellipsis_end_idx; ++i)
+ {
+ effective_ellipsis_mask |= (1 << i);
+ }
+ continue;
+ }
- uint32_t dims_count = begin_node->size<S32>();
+ if ((1 << (i - expanded_ellipsis)) & op_context->params.new_axis_mask)
+ {
+ effective_new_axis_mask |= (1 << i);
+ }
+ ++i;
+ }
- op_params.start_indices_count = dims_count;
- op_params.stop_indices_count = dims_count;
- op_params.strides_count = dims_count;
+ // Calculate effective_input_shape and its corresponding begin, end, strides.
+ loco::TensorShape input_shape = luci::shape_get(op_context->input).as<loco::TensorShape>();
+ uint32_t added_ellipsis = 0, added_axises = 0;
+ op_context->effective_input_shape.rank(effective_dims);
- for (uint32_t i = 0; i < dims_count; ++i)
+ for (uint32_t i = 0; i < effective_dims; ++i)
{
- op_params.start_indices[i] = begin_node->at<S32>(i);
- op_params.stop_indices[i] = end_node->at<S32>(i);
- op_params.strides[i] = strides_node->at<S32>(i);
+ if ((1 << i) & effective_ellipsis_mask)
+ {
+ // If ellipsis_mask, set the begin_mask and end_mask at that index.
+ added_ellipsis = std::max(0u, i - ellipsis_start_idx);
+ op_params.begin_mask |= (1 << i);
+ op_params.end_mask |= (1 << i);
+ op_params.strides[i] = 1;
+ op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises);
+ }
+ else if ((1 << i) & effective_new_axis_mask)
+ {
+ // If new_axis_mask is set, it is equivalent to adding a new dim of 1 to
+ // input tensor. Store added shape to effective_input_shape.
+ op_params.start_indices[i] = 0;
+ op_params.stop_indices[i] = 1;
+ op_params.strides[i] = 1;
+ op_context->effective_input_shape.dim(i) = loco::Dimension(1);
+ added_axises++;
+ }
+ else if (i >= begin_count + expanded_ellipsis)
+ {
+ op_params.start_indices[i] = 0;
+ op_params.stop_indices[i] = 0;
+ op_params.strides[i] = 1;
+ op_params.begin_mask |= (1 << i);
+ op_params.end_mask |= (1 << i);
+ op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises);
+ }
+ else
+ {
+ const uint32_t orig_idx = i - added_ellipsis;
+ op_params.start_indices[i] = op_context->begin->at<S32>(orig_idx);
+ op_params.stop_indices[i] = op_context->end->at<S32>(orig_idx);
+ op_params.strides[i] = op_context->strides->at<S32>(orig_idx);
+ if (op_context->params.begin_mask & (1 << orig_idx))
+ {
+ op_params.begin_mask |= (1 << i);
+ }
+ if (op_context->params.end_mask & (1 << orig_idx))
+ {
+ op_params.end_mask |= (1 << i);
+ }
+ if (op_context->params.shrink_axis_mask & (1 << orig_idx))
+ {
+ op_params.shrink_axis_mask |= (1 << i);
+ }
+ op_context->effective_input_shape.dim(i) = input_shape.dim(i - added_axises);
+ }
}
-
- op_params.begin_mask = node->begin_mask();
- op_params.ellipsis_mask = 0;
- op_params.end_mask = node->end_mask();
- op_params.new_axis_mask = 0;
- op_params.shrink_axis_mask = node->shrink_axis_mask();
+ op_params.start_indices_count = effective_dims;
+ op_params.stop_indices_count = effective_dims;
+ op_params.strides_count = effective_dims;
return op_params;
}
@@ -241,55 +373,54 @@ loco::TensorShape infer_output_shape(const CircleStridedSlice *node)
LUCI_ASSERT(end_node->dtype() == S32, "Only support S32 for end_node");
LUCI_ASSERT(strides_node->dtype() == S32, "Only support S32 for strides_node");
- assert(node->ellipsis_mask() == 0);
- assert(node->new_axis_mask() == 0);
+ LUCI_ASSERT(begin_node->rank() == 1, "Only support rank 1 for begin_node");
+ LUCI_ASSERT(end_node->rank() == 1, "Only support rank 1 for end_node");
+ LUCI_ASSERT(strides_node->rank() == 1, "Only support rank 1 for strides_node");
- auto op_params = BuildStridedSliceParams(node);
loco::TensorShape input_shape = luci::shape_get(input_node).as<loco::TensorShape>();
- uint32_t num_input_axes = input_shape.rank();
- assert(begin_node->size<S32>() <= num_input_axes);
- assert(end_node->size<S32>() <= num_input_axes);
- assert(strides_node->size<S32>() <= num_input_axes);
- for (uint32_t i = 0; i < strides_node->size<S32>(); i++)
- {
- LUCI_ASSERT(strides_node->at<S32>(i) != 0, "Stride value has to be non-zero");
- }
+ assert(begin_node->size<S32>() <= input_shape.rank());
+ assert(end_node->size<S32>() <= input_shape.rank());
+ assert(strides_node->size<S32>() <= input_shape.rank());
- uint32_t shape_size = 0;
- std::array<int32_t, 16> output_shape_data;
+ StridedSliceContext op_context(node);
+ auto op_params = BuildStridedSliceParams(&op_context);
+ auto effective_input_shape = op_context.effective_input_shape;
+ std::vector<int32_t> output_shape_vector;
- for (uint32_t idx = 0; idx < num_input_axes; ++idx)
+ for (int32_t idx = effective_input_shape.rank() - 1; idx >= 0; --idx)
{
- int32_t begin = StartForAxis(op_params, input_shape, idx);
- int32_t end = StopForAxis(op_params, input_shape, idx, begin);
- if (end < 0)
- end = input_shape.dim(idx).value() + end + 1;
+ int32_t stride = op_params.strides[idx];
+ LUCI_ASSERT(stride != 0, "stride value has to be non-zero");
- // This is valid for both positive and negative strides
- int32_t stride = strides_node->at<S32>(idx);
- int32_t dim_shape = std::ceil(static_cast<float>(end - begin) / stride);
- assert(dim_shape > 0);
+ int32_t begin = StartForAxis(op_params, effective_input_shape, idx);
+ int32_t end = StopForAxis(op_params, effective_input_shape, idx, begin);
// When shrinking an axis, the end position does not matter (and can be
// incorrect when negative indexing is used, see Issue #19260). Always use
// begin + 1 to generate a length 1 slice, since begin has
- // already been adjusted for negative indices by StartForAxis.
- const bool shrink_axis = node->shrink_axis_mask() & (1 << idx);
+ // already been adjusted for negative indices by GetBeginValueAtIndex.
+ const bool shrink_axis = op_params.shrink_axis_mask & (1 << idx);
if (shrink_axis)
{
- assert(dim_shape == 1);
+ end = begin + 1;
}
- else
+
+ // This is valid for both positive and negative strides
+ int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
+ dim_shape = dim_shape < 0 ? 0 : dim_shape;
+ if (!shrink_axis)
{
- output_shape_data[shape_size++] = dim_shape;
+ output_shape_vector.push_back(dim_shape);
}
}
+ auto shape_size = output_shape_vector.size();
output_shape.rank(shape_size);
for (uint32_t idx = 0; idx < shape_size; ++idx)
{
- output_shape.dim(idx) = output_shape_data[idx];
+ // reverse copy
+ output_shape.dim(idx) = output_shape_vector.at(shape_size - 1u - idx);
}
return output_shape;
diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst
index 94e723f21..09a25ff08 100644
--- a/compiler/luci/tests/test.lst
+++ b/compiler/luci/tests/test.lst
@@ -39,6 +39,7 @@ addread(Conv2D_003)
addread(Conv2D_U8_000)
addread(Conv2D_U8_001)
addread(Cos_000)
+addread(Densify_000)
addread(DepthToSpace_000)
addread(DepthwiseConv2D_000)
addread(DepthwiseConv2D_U8_000)
@@ -265,6 +266,7 @@ addwrite(Conv2D_003)
addwrite(Conv2D_U8_000)
addwrite(Conv2D_U8_001)
addwrite(Cos_000)
+addwrite(Densify_000)
addwrite(DepthToSpace_000)
addwrite(DepthwiseConv2D_000)
addwrite(DepthwiseConv2D_U8_000)
diff --git a/compiler/mio-circle04/include/mio_circle/Helper.h b/compiler/mio-circle04/include/mio_circle/Helper.h
index d3ffc23e5..7a1ba2b2f 100644
--- a/compiler/mio-circle04/include/mio_circle/Helper.h
+++ b/compiler/mio-circle04/include/mio_circle/Helper.h
@@ -19,6 +19,8 @@
#include <mio/circle/schema_generated.h>
+#include <vector>
+
namespace mio
{
namespace circle
@@ -31,6 +33,21 @@ std::string opcode_name(const ::circle::OperatorCode *opcode);
const char *tensor_type(const ::circle::Tensor *tensor);
const char *tensor_name(const ::circle::Tensor *tensor);
+template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
+{
+ if (flat_array == nullptr)
+ {
+ throw std::runtime_error("flat array is nullptr");
+ }
+
+ std::vector<T> ret(flat_array->Length());
+ for (uint32_t i = 0; i < flat_array->Length(); i++)
+ {
+ ret[i] = flat_array->Get(i);
+ }
+ return ret;
+}
+
} // namespace circle
} // namespace mio
diff --git a/compiler/mio-circle04/include/mio_circle/Reader.h b/compiler/mio-circle04/include/mio_circle/Reader.h
new file mode 100644
index 000000000..630646732
--- /dev/null
+++ b/compiler/mio-circle04/include/mio_circle/Reader.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_CIRCLE04_READER_H__
+#define __MIO_CIRCLE04_READER_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+// NOTE Reader class originated from circledump and for circle-tensordump
+// where this class has more work to be done for stability
+// as the tools are for developers not customores.
+
+namespace mio
+{
+namespace circle
+{
+
+/**
+ * @brief Loads Circle file and provides helpers to access attributes
+ */
+class Reader
+{
+private:
+ using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<::circle::SubGraph>>;
+ using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Buffer>>;
+ using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Tensor>>;
+ using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Operator>>;
+ using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Metadata>>;
+ using CircleSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<::circle::SignatureDef>>;
+
+public:
+ Reader(const ::circle::Model *model);
+
+ Reader() = delete;
+
+public:
+ uint32_t version() const { return _version; }
+
+ const std::vector<const ::circle::OperatorCode *> &opcodes() { return _op_codes; }
+ const CircleBuffers_t *buffers() { return _buffers; }
+ const CircleTensors_t *tensors() { return _tensors; }
+ const CircleOperators_t *operators() { return _operators; }
+ const std::vector<int32_t> &inputs() const { return _inputs; }
+ const std::vector<int32_t> &outputs() const { return _outputs; }
+ const ::circle::DataFormat &data_format() const { return _data_format; }
+ const CircleMetadata_t *metadata() const { return _metadata; }
+ const CircleSignatureDef_t *signature_defs() const { return _signature_defs; }
+
+ uint32_t num_subgraph() const { return _subgraphs->Length(); }
+
+ size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
+ ::circle::BuiltinOperator builtin_code(const ::circle::Operator *op) const;
+ std::string opcode_name(const ::circle::Operator *op) const;
+ std::vector<int32_t> outputs(const ::circle::Operator *op) const;
+ std::string tensor_name(const ::circle::Tensor *tensor) const;
+ std::string tensor_dtype(const ::circle::Tensor *tensor) const;
+
+public:
+ bool select_subgraph(uint32_t subgraph);
+ const std::string &subgraph_name(void) const { return _subgraph_name; }
+ uint32_t subgraph_index(void) const { return _subgraph_index; }
+
+private:
+ uint32_t _version;
+
+ const CircleSubGraphs_t *_subgraphs{nullptr};
+ const CircleBuffers_t *_buffers{nullptr};
+ const CircleTensors_t *_tensors{nullptr};
+ const CircleOperators_t *_operators{nullptr};
+ const CircleMetadata_t *_metadata{nullptr};
+ const CircleSignatureDef_t *_signature_defs{nullptr};
+
+ uint32_t _subgraph_index = 0;
+ std::string _subgraph_name;
+ std::vector<const ::circle::OperatorCode *> _op_codes;
+ std::vector<int32_t> _inputs;
+ std::vector<int32_t> _outputs;
+ ::circle::DataFormat _data_format = ::circle::DataFormat::DataFormat_CHANNELS_FIRST;
+};
+
+} // namespace circle
+} // namespace mio
+
+#endif // __MIO_CIRCLE04_READER_H__
diff --git a/compiler/mio-circle04/src/Reader.cpp b/compiler/mio-circle04/src/Reader.cpp
new file mode 100644
index 000000000..880ffaec8
--- /dev/null
+++ b/compiler/mio-circle04/src/Reader.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Reader.h"
+#include "mio_circle/Helper.h"
+
+#include <sstream>
+#include <string>
+
+namespace mio
+{
+namespace circle
+{
+
+Reader::Reader(const ::circle::Model *model)
+{
+ if (model == nullptr)
+ {
+ throw std::runtime_error("Invalid model");
+ }
+
+ _version = model->version();
+ _subgraphs = model->subgraphs();
+ _buffers = model->buffers();
+ _metadata = model->metadata();
+ _signature_defs = model->signature_defs();
+
+ auto opcodes = model->operator_codes();
+ for (const ::circle::OperatorCode *opcode : *opcodes)
+ {
+ _op_codes.push_back(opcode);
+ }
+}
+
+size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
+{
+ if (buff_data != nullptr)
+ {
+ *buff_data = nullptr;
+ }
+
+ if (buf_idx == 0)
+ return 0;
+
+ if (auto *buffer = (*_buffers)[buf_idx])
+ {
+ if (auto *array = buffer->data())
+ {
+ if (size_t size = array->size())
+ {
+ if (buff_data != nullptr)
+ {
+ *buff_data = reinterpret_cast<const uint8_t *>(array->data());
+ }
+ return size;
+ }
+ }
+ }
+
+ return 0;
+}
+
+::circle::BuiltinOperator Reader::builtin_code(const ::circle::Operator *op) const
+{
+ uint32_t index = op->opcode_index();
+ assert(index < _op_codes.size());
+ const ::circle::OperatorCode *opcode = _op_codes.at(index);
+
+ return mio::circle::builtin_code_neutral(opcode);
+}
+
+std::string Reader::opcode_name(const ::circle::Operator *op) const
+{
+ uint32_t index = op->opcode_index();
+ assert(index < _op_codes.size());
+ const ::circle::OperatorCode *opcode = _op_codes.at(index);
+
+ if (!mio::circle::is_valid(opcode))
+ {
+ std::ostringstream oss;
+ oss << "(invalid: " << index << ")";
+ return oss.str();
+ }
+
+ return mio::circle::opcode_name(opcode);
+}
+
+std::vector<int32_t> Reader::outputs(const ::circle::Operator *op) const
+{
+ return as_index_vector(op->outputs());
+}
+
+std::string Reader::tensor_name(const ::circle::Tensor *tensor) const
+{
+ return mio::circle::tensor_name(tensor);
+}
+
+std::string Reader::tensor_dtype(const ::circle::Tensor *tensor) const
+{
+ return mio::circle::tensor_type(tensor);
+}
+
+bool Reader::select_subgraph(uint32_t sgindex)
+{
+ _subgraph_index = sgindex;
+ _tensors = nullptr;
+ _operators = nullptr;
+
+ _inputs.clear();
+ _outputs.clear();
+
+ if (_subgraphs->Length() <= sgindex)
+ {
+ assert(false);
+ return false;
+ }
+
+ const ::circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
+
+ auto name = subgraph->name();
+ _subgraph_name = name ? name->c_str() : "(noname)";
+
+ _tensors = subgraph->tensors();
+ _operators = subgraph->operators();
+ _data_format = subgraph->data_format();
+
+ _inputs = as_index_vector(subgraph->inputs());
+ _outputs = as_index_vector(subgraph->outputs());
+
+ return true;
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle04/src/Reader.test.cpp b/compiler/mio-circle04/src/Reader.test.cpp
new file mode 100644
index 000000000..104454a62
--- /dev/null
+++ b/compiler/mio-circle04/src/Reader.test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Reader.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+class mio_circle04_reader_test : public ::testing::Test
+{
+protected:
+ void initialization_emty(void)
+ {
+ _model = circle::CreateModelDirect(_fbb, 0, &_opcodes_vec);
+ circle::FinishModelBuffer(_fbb, _model);
+ }
+
+ const circle::Model *circleModel(void)
+ {
+ auto ptr = _fbb.GetBufferPointer();
+ return circle::GetModel(ptr);
+ }
+
+private:
+ flatbuffers::FlatBufferBuilder _fbb;
+ flatbuffers::Offset<circle::Model> _model;
+ std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes_vec;
+};
+
+TEST_F(mio_circle04_reader_test, null_Model_NEG)
+{
+ EXPECT_THROW(mio::circle::Reader reader(nullptr), std::runtime_error);
+}
+
+TEST_F(mio_circle04_reader_test, empty_Model)
+{
+ initialization_emty();
+
+ const circle::Model *model = circleModel();
+ EXPECT_NE(nullptr, model);
+
+ mio::circle::Reader reader(model);
+
+ SUCCEED();
+}
+
+// TODO add more tests
diff --git a/compiler/mio-tflite/README.md b/compiler/mio-tflite/README.md
index 187b1a5c6..c717ab877 100644
--- a/compiler/mio-tflite/README.md
+++ b/compiler/mio-tflite/README.md
@@ -1,3 +1,5 @@
# mio-tflite
_mio-tflite_ provides a library to access TensorFlow lite model files
+
+NOTE: _mio-tflite_ is currently obsolete
diff --git a/compiler/mio-tflite260/README.md b/compiler/mio-tflite260/README.md
index 970569b47..86d2998ed 100644
--- a/compiler/mio-tflite260/README.md
+++ b/compiler/mio-tflite260/README.md
@@ -1,3 +1,5 @@
# mio-tflite260
_mio-tflite260_ provides a library to access TensorFlow lite model files with V2.6.0.
+
+NOTE: _mio-tflite260_ is currently obsolete
diff --git a/compiler/mir/include/mir/Graph.h b/compiler/mir/include/mir/Graph.h
index bf94cfb14..37bfdb361 100644
--- a/compiler/mir/include/mir/Graph.h
+++ b/compiler/mir/include/mir/Graph.h
@@ -103,6 +103,10 @@ private:
/**
* @brief Returns nodes of the graph sorted topologically.
+ * @note Sorting order priority
+ * 1) Graph input node (input index order)
+ * 2) Constant node (unordered - cannot predict order)
+ * 3) Ready node (unordered - cannot predict order)
*/
std::vector<Operation *> getSortedNodes(Graph *graph);
diff --git a/compiler/mir/src/Graph.cpp b/compiler/mir/src/Graph.cpp
index 04b005de4..05d6dc9bd 100644
--- a/compiler/mir/src/Graph.cpp
+++ b/compiler/mir/src/Graph.cpp
@@ -44,9 +44,16 @@ std::vector<Operation *> getSortedNodes(Graph *graph)
std::deque<Operation *> ready_nodes;
std::unordered_map<Operation *, std::size_t> num_visited_input_edges;
+ // Use input vector first to maintain correct input order
+ for (Operation *op : graph->getInputs())
+ {
+ ready_nodes.push_back(op);
+ }
+
for (Operation *op : graph->getNodes())
{
- if (op->getNumInputs() == 0)
+ // Skip already pushed input node
+ if ((op->getNumInputs() == 0) && (op->getType() != Operation::Type::input))
{
ready_nodes.push_back(op);
}
diff --git a/compiler/mir2loco/src/mir2loco.test.cpp b/compiler/mir2loco/src/mir2loco.test.cpp
index 92ab99488..244c92aa8 100644
--- a/compiler/mir2loco/src/mir2loco.test.cpp
+++ b/compiler/mir2loco/src/mir2loco.test.cpp
@@ -383,28 +383,49 @@ TEST_F(TestTransformer_mir2loco, Conv2D_Test)
auto loco_graph = transformer.transform(&mir_graph);
loco::Pull *pull_node = dynamic_cast<loco::Pull *>(loco_graph->nodes()->at(0));
- loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(loco_graph->nodes()->at(1));
- loco::FeatureEncode *encode_node =
- dynamic_cast<loco::FeatureEncode *>(loco_graph->nodes()->at(2));
- loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(loco_graph->nodes()->at(3));
- loco::Conv2D *conv_node = dynamic_cast<loco::Conv2D *>(loco_graph->nodes()->at(4));
- loco::FeatureDecode *decode_node =
- dynamic_cast<loco::FeatureDecode *>(loco_graph->nodes()->at(5));
- loco::Push *push_node = dynamic_cast<loco::Push *>(loco_graph->nodes()->at(6));
-
ASSERT_NE(pull_node, nullptr);
+
+ // ConstGen: Only one ConstGen node
+ // We can convince that this node is input of FilterEncode because this is only ConstGen node
+ loco::ConstGen *const_node = dynamic_cast<loco::ConstGen *>(loco_graph->nodes()->at(1));
ASSERT_NE(const_node, nullptr);
- ASSERT_NE(filter_node, nullptr);
+
+ // FeatureEncode
+ auto pull_uses = loco::succs(pull_node);
+ ASSERT_EQ(pull_uses.size(), 1);
+ loco::FeatureEncode *encode_node = dynamic_cast<loco::FeatureEncode *>(*pull_uses.begin());
ASSERT_NE(encode_node, nullptr);
- ASSERT_NE(conv_node, nullptr);
- ASSERT_NE(decode_node, nullptr);
- ASSERT_NE(push_node, nullptr);
ASSERT_EQ(encode_node->input(), pull_node);
- ASSERT_EQ(filter_node->input(), const_node);
+
+ // Conv2D
+ auto encode_uses = loco::succs(encode_node);
+ ASSERT_EQ(encode_uses.size(), 1);
+ loco::Conv2D *conv_node = dynamic_cast<loco::Conv2D *>(*encode_uses.begin());
+ ASSERT_NE(conv_node, nullptr);
ASSERT_EQ(conv_node->ifm(), encode_node);
+
+ // FilterEncode
+ auto const_uses = loco::succs(const_node);
+ ASSERT_EQ(const_uses.size(), 1);
+ loco::FilterEncode *filter_node = dynamic_cast<loco::FilterEncode *>(*const_uses.begin());
+ ASSERT_NE(filter_node, nullptr);
+ ASSERT_EQ(filter_node->input(), const_node);
ASSERT_EQ(conv_node->ker(), filter_node);
+
+ // FeatureDecode
+ auto conv_uses = loco::succs(conv_node);
+ ASSERT_EQ(conv_uses.size(), 1);
+ loco::FeatureDecode *decode_node = dynamic_cast<loco::FeatureDecode *>(*conv_uses.begin());
+ ASSERT_NE(decode_node, nullptr);
ASSERT_EQ(decode_node->input(), conv_node);
+
+ // Push
+ auto decode_uses = loco::succs(decode_node);
+ ASSERT_EQ(decode_uses.size(), 1);
+ loco::Push *push_node = dynamic_cast<loco::Push *>(*decode_uses.begin());
+ ASSERT_NE(push_node, nullptr);
ASSERT_EQ(push_node->from(), decode_node);
+
// Check params
ASSERT_EQ(conv_node->pad()->top(), 5);
ASSERT_EQ(conv_node->pad()->left(), 9);
diff --git a/compiler/moco/import/src/Importer.cpp b/compiler/moco/import/src/Importer.cpp
index 333f0f6a9..0659fd165 100644
--- a/compiler/moco/import/src/Importer.cpp
+++ b/compiler/moco/import/src/Importer.cpp
@@ -190,7 +190,7 @@ std::unique_ptr<loco::Graph> Importer::import(const ModelSignature &signature,
convert_graph(*source_ptr, signature, tf_graph_def, graph.get());
- return std::move(graph);
+ return graph;
}
} // namespace moco
diff --git a/compiler/moco/lang/src/IR/TFNode.cpp b/compiler/moco/lang/src/IR/TFNode.cpp
index 55c0e0c64..b59a505b5 100644
--- a/compiler/moco/lang/src/IR/TFNode.cpp
+++ b/compiler/moco/lang/src/IR/TFNode.cpp
@@ -17,6 +17,7 @@
#include "moco/IR/TFNode.h"
#include "moco/IR/TFDialect.h"
+#include <limits>
#include <memory>
#include <cassert>
diff --git a/compiler/one-cmds/CMakeLists.txt b/compiler/one-cmds/CMakeLists.txt
index 8732340ae..90e989a00 100644
--- a/compiler/one-cmds/CMakeLists.txt
+++ b/compiler/one-cmds/CMakeLists.txt
@@ -8,7 +8,9 @@ set(ONE_COMMAND_FILES
one-optimize
one-quantize
one-pack
+ one-partition
one-profile
+ one-infer
one-codegen
one-prepare-venv
onecc
@@ -74,7 +76,11 @@ endforeach(ONE_UTILITY)
# make python directory
set(ONE_PYTHON_FILES constant.py
- make_cmd.py)
+ make_cmd.py
+ CfgRunner.py
+ OptionBuilder.py
+ TopologicalSortHelper.py
+ WorkflowRunner.py)
foreach(ONE_PYTHON_FILE IN ITEMS ${ONE_PYTHON_FILES})
diff --git a/compiler/one-cmds/dummy-driver/CMakeLists.txt b/compiler/one-cmds/dummy-driver/CMakeLists.txt
index 690a60776..2552a02db 100644
--- a/compiler/one-cmds/dummy-driver/CMakeLists.txt
+++ b/compiler/one-cmds/dummy-driver/CMakeLists.txt
@@ -1,16 +1,25 @@
# dummy driver for interface test
set(DUMMY_DRIVER_SRC src/dummy-compile.cpp)
set(HELP_DRIVER_SRC src/help-compile.cpp)
+set(DUMMY_INFER_SRC src/dummy-infer.cpp)
+set(DUMMY_INFER_V2_SRC src/dummy-inferV2.cpp)
+set(HELP_INFER_SRC src/help-infer.cpp)
set(DUMMY_PROFILE_SRC src/dummy-profile.cpp)
set(HELP_PROFILE_SRC src/help-profile.cpp)
add_executable(dummy-compile ${DUMMY_DRIVER_SRC})
add_executable(help-compile ${HELP_DRIVER_SRC})
+add_executable(dummy-infer ${DUMMY_INFER_SRC})
+add_executable(dummy-inferV2 ${DUMMY_INFER_V2_SRC})
+add_executable(help-infer ${HELP_INFER_SRC})
add_executable(dummy-profile ${DUMMY_PROFILE_SRC})
add_executable(help-profile ${HELP_PROFILE_SRC})
set(DUMMY_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/dummy-compile")
set(HELP_DRIVER "${CMAKE_CURRENT_BINARY_DIR}/help-compile")
+set(DUMMY_INFER "${CMAKE_CURRENT_BINARY_DIR}/dummy-infer")
+set(DUMMY_INFER_V2 "${CMAKE_CURRENT_BINARY_DIR}/dummy-inferV2")
+set(HELP_INFER "${CMAKE_CURRENT_BINARY_DIR}/help-infer")
set(DUMMY_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/dummy-profile")
set(HELP_PROFILE "${CMAKE_CURRENT_BINARY_DIR}/help-profile")
@@ -26,6 +35,24 @@ install(FILES ${HELP_DRIVER}
WORLD_READ WORLD_EXECUTE
DESTINATION test)
+install(FILES ${DUMMY_INFER}
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION test)
+
+install(FILES ${DUMMY_INFER_V2}
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION test)
+
+install(FILES ${HELP_INFER}
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION test)
+
install(FILES ${DUMMY_PROFILE}
PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
GROUP_READ GROUP_EXECUTE
diff --git a/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp b/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp
new file mode 100644
index 000000000..60f5faefa
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/dummy-infer.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummy-infer only tests its interface rather than its functionality.
+ *
+ * ./dummy-infer ${INPUT_NAME}
+ * dummy-infer dummy output!!!
+ */
+
+#include <iostream>
+
+int main(int argc, char **argv)
+{
+ if (argc != 2)
+ return EXIT_FAILURE;
+
+ std::cout << "dummy-infer dummy output!!!" << std::endl;
+
+ return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp b/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp
new file mode 100644
index 000000000..4b93c70a3
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/dummy-inferV2.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * dummy-infer only tests its interface rather than its functionality.
+ *
+ * ./dummy-infer ${INPUT_NAME}
+ * Do inference of ${INPUT_NAME}
+ */
+
+#include <iostream>
+
+int main(int argc, char **argv)
+{
+ if (argc != 2)
+ return EXIT_FAILURE;
+
+ std::cout << "Do inference of " + std::string(argv[1]) << std::endl;
+
+ return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/dummy-driver/src/help-infer.cpp b/compiler/one-cmds/dummy-driver/src/help-infer.cpp
new file mode 100644
index 000000000..821d368d4
--- /dev/null
+++ b/compiler/one-cmds/dummy-driver/src/help-infer.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * help-infer prints dummy help message.
+ *
+ * $ ./help-infer -h
+ * HELP MESSAGE!!
+ */
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+int main(int argc, char **argv)
+{
+ if (argc != 2)
+ return EXIT_FAILURE;
+
+ std::string opt_h{"-h"};
+ std::string argv_1{argv[1]};
+
+ if (opt_h != argv_1)
+ return EXIT_FAILURE;
+
+ std::cout << "HELP MESSAGE!!" << std::endl;
+
+ return EXIT_SUCCESS;
+}
diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt
index ebc165167..2352bbd7a 100644
--- a/compiler/one-cmds/how-to-use-one-commands.txt
+++ b/compiler/one-cmds/how-to-use-one-commands.txt
@@ -153,6 +153,7 @@ Current transformation options are
- expand_broadcast_const : This will expand broadcastable constant node inputs
- fold_add_v2 : This removes AddV2 operation which can be folded
- fold_cast : This removes Cast operation which can be folded
+- fold_densify: This removes Densify operator which can be folded
- fold_dequantize : This removes Dequantize operation which can be folded
- fold_dwconv : This folds Depthwise Convolution operation which can be folded
- fold_gather : This removes Gather operation which can be folded
@@ -205,10 +206,6 @@ Current transformation options are
- transform_min_max_to_relu6: This will transform Minimum-Maximum pattern to Relu6 operator.
- transform_min_relu_to_relu6: This will transform Minimum(6)-Relu pattern to Relu6 operator.
-There are options to enable multiple options at once for convenience.
-- O1: fuse_bcq, fuse_instnorm, resolve_customop_add, resolve_customop_batchmatmul,
- resolve_customop_matmul, remove_redundant_transpose, substitute_pack_to_reshape
-
one-quantize
------------
diff --git a/compiler/one-cmds/one-build b/compiler/one-cmds/one-build
index 5c313b44b..4b1f98070 100644
--- a/compiler/one-cmds/one-build
+++ b/compiler/one-cmds/one-build
@@ -22,7 +22,6 @@
import argparse
import configparser
import os
-import subprocess
import sys
import utils as _utils
@@ -83,6 +82,7 @@ def _get_driver_name(driver_name):
'one-import-onnx': 'one-import-onnx',
'one-optimize': 'one-optimize',
'one-quantize': 'one-quantize',
+ 'one-partition': 'one-partition',
'one-pack': 'one-pack',
'one-codegen': 'one-codegen'
}[driver_name]
@@ -157,7 +157,8 @@ def main():
bin_dir = os.path.dirname(os.path.realpath(__file__))
import_drivers_dict = _utils._detect_one_import_drivers(bin_dir)
transform_drivers = [
- 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile'
+ 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile',
+ 'one-partition'
]
_verify_cfg(import_drivers_dict, config)
diff --git a/compiler/one-cmds/one-build.template.cfg b/compiler/one-cmds/one-build.template.cfg
index e147896ef..42960811e 100644
--- a/compiler/one-cmds/one-build.template.cfg
+++ b/compiler/one-cmds/one-build.template.cfg
@@ -5,6 +5,7 @@ one-import-bcq=False
one-import-onnx=False
one-optimize=True
one-quantize=False
+one-parition=False
one-pack=True
one-codegen=False
diff --git a/compiler/one-cmds/one-codegen b/compiler/one-cmds/one-codegen
index 726538d44..86e1632e6 100644
--- a/compiler/one-cmds/one-codegen
+++ b/compiler/one-cmds/one-codegen
@@ -25,9 +25,7 @@ import glob
import itertools
import ntpath
import os
-import subprocess
import sys
-import tempfile
import shutil
import utils as _utils
diff --git a/compiler/one-cmds/one-import-bcq b/compiler/one-cmds/one-import-bcq
index ef89a9297..c3ef0b275 100644
--- a/compiler/one-cmds/one-import-bcq
+++ b/compiler/one-cmds/one-import-bcq
@@ -21,7 +21,6 @@
import argparse
import os
-import subprocess
import sys
import tempfile
@@ -160,9 +159,9 @@ def _convert(args):
tmpdir,
os.path.splitext(
os.path.basename(generate_bcq_metadata_output_path))[0]) + '.tflite'
- tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path,
- generate_bcq_metadata_output_path,
- tf2tfliteV2_output_path)
+ tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(
+ args, tf2tfliteV2_path, generate_bcq_metadata_output_path,
+ tf2tfliteV2_output_path)
try:
output_arrays_idx = tf2tfliteV2_cmd.index('--output_arrays')
tf2tfliteV2_cmd[output_arrays_idx + 1] = ','.join(bcq_output_arrays)
@@ -177,8 +176,8 @@ def _convert(args):
# make a command to convert from tflite to circle
tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
- tf2tfliteV2_output_path,
- getattr(args, 'output_path'))
+ tf2tfliteV2_output_path,
+ getattr(args, 'output_path'))
f.write((' '.join(tflite2circle_cmd) + '\n').encode())
diff --git a/compiler/one-cmds/one-import-onnx b/compiler/one-cmds/one-import-onnx
index eaa136197..ad19c2f59 100644
--- a/compiler/one-cmds/one-import-onnx
+++ b/compiler/one-cmds/one-import-onnx
@@ -21,7 +21,6 @@
import argparse
import os
-import subprocess
import sys
import tempfile
import onnx
@@ -80,6 +79,12 @@ def _get_parser():
parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators')
parser.add_argument(
'--unroll_lstm', action='store_true', help='Unroll LSTM operators')
+ parser.add_argument(
+ '--keep_io_order',
+ action='store_true',
+ help=
+ 'Ensure generated circle model preserves the I/O order of the original onnx model.'
+ )
# save intermediate file(s)
parser.add_argument(
@@ -87,6 +92,12 @@ def _get_parser():
action='store_true',
help='Save intermediate files to output folder')
+ # experimental options
+ parser.add_argument(
+ '--experimental_disable_batchmatmul_unfold',
+ action='store_true',
+ help='Experimental disable BatchMatMul unfold')
+
return parser
@@ -124,6 +135,65 @@ def _apply_verbosity(verbosity):
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+# The index of input/output is added in front of the name. For example,
+# Original input names: 'a', 'c', 'b'
+# Renamed: '0001_a', '0002_c', '0003_b'
+# This will preserve I/O order after import.
+def _remap_io_names(onnx_model):
+ # gather existing name of I/O and generate new name of I/O in sort order
+ input_nodes = []
+ output_nodes = []
+ remap_inputs = []
+ remap_outputs = []
+ initializers = []
+ # some models may have initializers as inputs. ignore them.
+ for initializer in onnx_model.graph.initializer:
+ initializers.append(initializer.name)
+ for idx in range(0, len(onnx_model.graph.input)):
+ name = onnx_model.graph.input[idx].name
+ if not name in initializers:
+ input_nodes.append(name)
+ remap_inputs.append(format(idx + 1, '04d') + '_' + name)
+ for idx in range(0, len(onnx_model.graph.output)):
+ name = onnx_model.graph.output[idx].name
+ output_nodes.append(name)
+ remap_outputs.append(format(idx + 1, '04d') + '_' + name)
+ # change names for graph input
+ for i in range(len(onnx_model.graph.input)):
+ if onnx_model.graph.input[i].name in input_nodes:
+ to_rename = onnx_model.graph.input[i].name
+ idx = input_nodes.index(to_rename)
+ onnx_model.graph.input[i].name = remap_inputs[idx]
+ # change names of all nodes in the graph
+ for i in range(len(onnx_model.graph.node)):
+ # check node.input is to change to remap_inputs or remap_outputs
+ for j in range(len(onnx_model.graph.node[i].input)):
+ if onnx_model.graph.node[i].input[j] in input_nodes:
+ to_rename = onnx_model.graph.node[i].input[j]
+ idx = input_nodes.index(to_rename)
+ onnx_model.graph.node[i].input[j] = remap_inputs[idx]
+ if onnx_model.graph.node[i].input[j] in output_nodes:
+ to_rename = onnx_model.graph.node[i].input[j]
+ idx = output_nodes.index(to_rename)
+ onnx_model.graph.node[i].input[j] = remap_outputs[idx]
+ # check node.output is to change to remap_inputs or remap_outputs
+ for j in range(len(onnx_model.graph.node[i].output)):
+ if onnx_model.graph.node[i].output[j] in output_nodes:
+ to_rename = onnx_model.graph.node[i].output[j]
+ idx = output_nodes.index(to_rename)
+ onnx_model.graph.node[i].output[j] = remap_outputs[idx]
+ if onnx_model.graph.node[i].output[j] in input_nodes:
+ to_rename = onnx_model.graph.node[i].output[j]
+ idx = input_nodes.index(to_rename)
+ onnx_model.graph.node[i].output[j] = remap_inputs[idx]
+ # change names for graph output
+ for i in range(len(onnx_model.graph.output)):
+ if onnx_model.graph.output[i].name in output_nodes:
+ to_rename = onnx_model.graph.output[i].name
+ idx = output_nodes.index(to_rename)
+ onnx_model.graph.output[i].name = remap_outputs[idx]
+
+
def _convert(args):
_apply_verbosity(args.verbose)
@@ -142,6 +212,13 @@ def _convert(args):
options.unroll_rnn = _utils._is_valid_attr(args, 'unroll_rnn')
options.unroll_lstm = _utils._is_valid_attr(args, 'unroll_lstm')
onnx_legalizer.legalize(onnx_model, options)
+ if _utils._is_valid_attr(args, 'keep_io_order'):
+ _remap_io_names(onnx_model)
+ if _utils._is_valid_attr(args, 'save_intermediate'):
+ basename = os.path.basename(getattr(args, 'input_path'))
+ fixed_path = os.path.join(tmpdir,
+ os.path.splitext(basename)[0] + '~.onnx')
+ onnx.save(onnx_model, fixed_path)
tf_savedmodel = onnx_tf.backend.prepare(onnx_model)
savedmodel_name = os.path.splitext(os.path.basename(
@@ -166,8 +243,8 @@ def _convert(args):
# make a command to convert from tflite to circle
tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
- tf2tfliteV2_output_path,
- getattr(args, 'output_path'))
+ tf2tfliteV2_output_path,
+ getattr(args, 'output_path'))
f.write((' '.join(tflite2circle_cmd) + '\n').encode())
diff --git a/compiler/one-cmds/one-import-pytorch b/compiler/one-cmds/one-import-pytorch
index dbf1ba6d7..7f39e61bb 100644
--- a/compiler/one-cmds/one-import-pytorch
+++ b/compiler/one-cmds/one-import-pytorch
@@ -80,7 +80,8 @@ def _get_parser():
tf2tflite_group.add_argument('--converter_version', default='v2')
parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators')
- parser.add_argument('--unroll_lstm', action='store_true', help='Unroll LSTM operators')
+ parser.add_argument(
+ '--unroll_lstm', action='store_true', help='Unroll LSTM operators')
# save intermediate file(s)
parser.add_argument(
@@ -338,8 +339,8 @@ def _convert(args):
# make a command to convert from tflite to circle
tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
- tf2tfliteV2_output_path,
- getattr(args, 'output_path'))
+ tf2tfliteV2_output_path,
+ getattr(args, 'output_path'))
f.write((' '.join(tflite2circle_cmd) + '\n').encode())
diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf
index 999255a34..6623fa6a4 100644
--- a/compiler/one-cmds/one-import-tf
+++ b/compiler/one-cmds/one-import-tf
@@ -21,8 +21,6 @@
import argparse
import os
-import subprocess
-import sys
import tempfile
import onelib.make_cmd as _make_cmd
@@ -152,8 +150,8 @@ def _convert(args):
tmpdir,
os.path.splitext(os.path.basename(args.output_path))[0]) + '.tflite'
tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path,
- getattr(args, 'input_path'),
- tf2tfliteV2_output_path)
+ getattr(args, 'input_path'),
+ tf2tfliteV2_output_path)
f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
@@ -163,8 +161,8 @@ def _convert(args):
# make a command to convert from tflite to circle
tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
- tf2tfliteV2_output_path,
- getattr(args, 'output_path'))
+ tf2tfliteV2_output_path,
+ getattr(args, 'output_path'))
f.write((' '.join(tflite2circle_cmd) + '\n').encode())
diff --git a/compiler/one-cmds/one-import-tflite b/compiler/one-cmds/one-import-tflite
index 2d756bff6..3d96b117f 100644
--- a/compiler/one-cmds/one-import-tflite
+++ b/compiler/one-cmds/one-import-tflite
@@ -21,7 +21,6 @@
import argparse
import os
-import subprocess
import sys
import onelib.make_cmd as _make_cmd
@@ -83,8 +82,8 @@ def _convert(args):
# make a command to convert from tflite to circle
tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
- getattr(args, 'input_path'),
- getattr(args, 'output_path'))
+ getattr(args, 'input_path'),
+ getattr(args, 'output_path'))
f.write((' '.join(tflite2circle_cmd) + '\n').encode())
diff --git a/compiler/one-cmds/one-infer b/compiler/one-cmds/one-infer
new file mode 100644
index 000000000..c7fcd8afd
--- /dev/null
+++ b/compiler/one-cmds/one-infer
@@ -0,0 +1,224 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255 # '''
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import copy
+import glob
+import itertools
+import ntpath
+import os
+import sys
+
+import utils as _utils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+def _get_backends_list():
+ """
+ [one hierarchy]
+ one
+ ├── backends
+ ├── bin
+ ├── doc
+ ├── include
+ ├── lib
+ ├── optimization
+ └── test
+
+ The list where `one-infer` finds its backends
+ - `bin` folder where `one-infer` exists
+ - `backends` folder
+
+ NOTE If there are backends of the same name in different places,
+ the closer to the top in the list, the higher the priority.
+ """
+ dir_path = os.path.dirname(os.path.realpath(__file__))
+ backend_set = set()
+
+ # bin folder
+ files = [f for f in glob.glob(dir_path + '/*-infer')]
+ # backends folder
+ files += [f for f in glob.glob(dir_path + '/../backends/**/*-infer', recursive=True)]
+ # TODO find backends in `$PATH`
+
+ backends_list = []
+ for cand in files:
+ base = ntpath.basename(cand)
+ if (not base in backend_set) and os.path.isfile(cand) and os.access(
+ cand, os.X_OK):
+ backend_set.add(base)
+ backends_list.append(cand)
+
+ return backends_list
+
+
+def _search_backend_driver(driver):
+ """
+ [one hierarchy]
+ one
+ ├── backends
+ ├── bin
+ ├── doc
+ ├── include
+ ├── lib
+ ├── optimization
+ └── test
+
+ The list where `one-infer` finds its backend driver
+ - `bin` folder where `one-infer` exists
+ - `backends/**/bin/` folder
+
+ NOTE If there are drivers of the same name in different places,
+ the closer to the top in the list, the higher the priority.
+ """
+ dir_path = os.path.dirname(os.path.realpath(__file__))
+
+ # CASE 1: one/bin/{driver} is found
+ driver_path = dir_path + '/' + driver
+ if os.path.isfile(driver_path) and os.access(driver_path, os.X_OK):
+ return driver_path
+
+ # CASE 2: one/backends/**/bin/{driver} is found
+ for driver_path in glob.glob(
+ dir_path + '/../backends/**/bin/' + driver, recursive=True):
+ if os.path.isfile(driver_path) and os.access(driver_path, os.X_OK):
+ return driver_path
+
+ # CASE 3: {driver} is found in nowhere
+ return None
+
+
+def _get_parser(backends_list):
+ infer_usage = 'one-infer [-h] [-v] [-C CONFIG] [-d DRIVER | -b BACKEND] [--post-process POST_PROCESS] [--] [COMMANDS FOR BACKEND DRIVER]'
+ parser = argparse.ArgumentParser(
+ description='command line tool to infer model', usage=infer_usage)
+
+ _utils._add_default_arg(parser)
+
+ # TODO: add tflite/onnx-infer driver to helper message when it is implemented
+ driver_help_message = 'backend inference driver name to execute'
+ parser.add_argument('-d', '--driver', type=str, help=driver_help_message)
+
+ # get backend list in the directory
+ backends_name = [ntpath.basename(f) for f in backends_list]
+ if not backends_name:
+ backends_name_message = '(There is no available backend drivers)'
+ else:
+ backends_name_message = '(available backend drivers: ' + ', '.join(
+ backends_name) + ')'
+ backend_help_message = 'backend name to use ' + backends_name_message
+ parser.add_argument('-b', '--backend', type=str, help=backend_help_message)
+
+ post_process_help_message = 'post processing script to convert I/O data to standard format'
+ parser.add_argument('--post-process', type=str, help=post_process_help_message)
+
+ return parser
+
+
+def _verify_arg(parser, args):
+ """verify given arguments"""
+ # `-d/--driver` and `-b/--backend` are mutually exclusive arguments.
+ if _utils._is_valid_attr(args, 'driver') and _utils._is_valid_attr(args, 'backend'):
+ parser.error(
+ '-d and -b options are mutually exclusive. Please use only one of them')
+
+ missing = []
+ if not _utils._is_valid_attr(args, 'driver') and not _utils._is_valid_attr(
+ args, 'backend'):
+ missing.append('{-d/--driver | -b/--backend}')
+ if len(missing):
+ parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+ infer_args = []
+ backend_args = []
+ argv = copy.deepcopy(sys.argv)
+ # delete file name
+ del argv[0]
+ # split by '--'
+ args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
+
+ # one-infer [-h] [-v] [-C CONFIG] [-d DRIVER] [-b BACKEND] [--post-process POST_PROCESS] -- [COMMANDS FOR BACKEND DRIVER]
+ if len(args):
+ infer_args = args[0]
+ infer_args = parser.parse_args(infer_args)
+ backend_args = backend_args if len(args) < 2 else args[1]
+ # print version
+ if len(args) and infer_args.version:
+ _utils._print_version_and_exit(__file__)
+
+ return infer_args, backend_args
+
+
+def _get_executable(args, backends_list):
+ driver = _utils._is_valid_attr(args, 'driver')
+ if driver:
+ executable = _search_backend_driver(driver)
+ if executable:
+ return executable
+ else:
+ raise FileNotFoundError(driver + ' not found')
+
+ if _utils._is_valid_attr(args, 'backend'):
+ backend_base = getattr(args, 'backend') + '-infer'
+ for cand in backends_list:
+ if ntpath.basename(cand) == backend_base:
+ return cand
+ raise FileNotFoundError(backend_base + ' not found')
+
+
+def main():
+ # get backend list
+ backends_list = _get_backends_list()
+
+ # parse arguments
+ parser = _get_parser(backends_list)
+ args, backend_args = _parse_arg(parser)
+
+ # parse configuration file
+ _utils._parse_cfg(args, 'one-infer')
+
+ # verify arguments
+ _verify_arg(parser, args)
+
+ # make a command to run given backend driver
+ driver_path = _get_executable(args, backends_list)
+ infer_cmd = [driver_path] + backend_args
+ if _utils._is_valid_attr(args, 'command'):
+ infer_cmd += getattr(args, 'command').split()
+
+ # run backend driver
+ _utils._run(infer_cmd, err_prefix=ntpath.basename(driver_path))
+
+ # run post process script if it's given
+ if _utils._is_valid_attr(args, 'post_process'):
+ # NOTE: the given python script will be executed by venv of ONE
+ python_path = sys.executable
+ post_process_command = [python_path] + getattr(args,
+ 'post_process').strip().split(' ')
+ _utils._run(post_process_command, err_prefix='one-infer')
+
+
+if __name__ == '__main__':
+ _utils._safemain(main, __file__)
diff --git a/compiler/one-cmds/one-init b/compiler/one-cmds/one-init
new file mode 100644
index 000000000..04c4534cd
--- /dev/null
+++ b/compiler/one-cmds/one-init
@@ -0,0 +1,280 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255 # '''
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import copy
+import glob
+import itertools
+import ntpath
+import os
+import sys
+
+import configparser
+import utils as _utils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+class CommentableConfigParser(configparser.ConfigParser):
+ """
+ ConfigParser where comment can be stored
+ In Python ConfigParser, comment in ini file ( starting with ';') is considered a key of which
+ value is None.
+ Ref: https://stackoverflow.com/questions/6620637/writing-comments-to-files-with-configparser
+ """
+
+ def __init__(self):
+ # allow_no_value=True to add comment
+ # ref: https://stackoverflow.com/a/19432072
+ configparser.ConfigParser.__init__(self, allow_no_value=True)
+ self.optionxform = str
+
+ def add_comment(self, section, comment):
+ comment_sign = ';'
+ self[section][f'{comment_sign} {comment}'] = None
+
+
+def _get_backends_list():
+ """
+ [one hierarchy]
+ one
+ ├── backends
+ ├── bin
+ ├── doc
+ ├── include
+ ├── lib
+ ├── optimization
+ └── test
+
+ The list where `one-init` finds its backends
+ - `bin` folder where `one-init` exists
+ - `backends` folder
+
+ NOTE If there are backends of the same name in different places,
+ the closer to the top in the list, the higher the priority.
+ """
+ dir_path = os.path.dirname(os.path.realpath(__file__))
+ backend_set = set()
+
+ # bin folder
+ files = [f for f in glob.glob(dir_path + '/*-init')]
+ # backends folder
+ files += [f for f in glob.glob(dir_path + '/../backends/**/*-init', recursive=True)]
+ # TODO find backends in `$PATH`
+
+ backends_list = []
+ for cand in files:
+ base = ntpath.basename(cand)
+ if (not base in backend_set) and os.path.isfile(cand) and os.access(
+ cand, os.X_OK):
+ backend_set.add(base)
+ backends_list.append(cand)
+
+ return backends_list
+
+
+# TODO Add support for TF graphdef and bcq
+def _get_parser(backends_list):
+ init_usage = (
+ 'one-init [-h] [-v] [-V] '
+ '[-i INPUT_PATH] '
+ '[-o OUTPUT_PATH] '
+ '[-m MODEL_TYPE] '
+ '[-b BACKEND] '
+ # args for onnx model
+ '[--convert_nchw_to_nhwc] '
+ '[--nchw_to_nhwc_input_shape] '
+ '[--nchw_to_nhwc_output_shape] '
+ # args for backend driver
+ '[--] [COMMANDS FOR BACKEND DRIVER]')
+ """
+ NOTE
+ layout options for onnx model could be difficult to users.
+ In one-init, we could consider easier args for the the above three:
+ For example, we could have another option, e.g., --input_img_layout LAYOUT
+ - When LAYOUT is NHWC, apply 'nchw_to_nhwc_input_shape=True' into cfg
+ - When LAYOUT is NCHW, apply 'nchw_to_nhwc_input_shape=False' into cfg
+ """
+
+ parser = argparse.ArgumentParser(
+ description='Command line tool to generate initial cfg file. '
+ 'Currently tflite and onnx models are supported',
+ usage=init_usage)
+
+ _utils._add_default_arg_no_CS(parser)
+
+ parser.add_argument(
+ '-i', '--input_path', type=str, help='full filepath of the input model file')
+ parser.add_argument(
+ '-o', '--output_path', type=str, help='full filepath of the output cfg file')
+ parser.add_argument(
+ '-m',
+ '--model_type',
+ type=str,
+ help=('type of input model: "onnx", "tflite". '
+ 'If the file extension passed to --input_path is '
+ '".tflite" or ".onnx", this arg can be omitted.'))
+
+ onnx_group = parser.add_argument_group('arguments when model type is onnx')
+ onnx_group.add_argument(
+ '--convert_nchw_to_nhwc',
+ action='store_true',
+ help=
+ 'Convert NCHW operators to NHWC under the assumption that input model is NCHW.')
+ onnx_group.add_argument(
+ '--nchw_to_nhwc_input_shape',
+ action='store_true',
+ help='Convert the input shape of the model (argument for convert_nchw_to_nhwc)')
+ onnx_group.add_argument(
+ '--nchw_to_nhwc_output_shape',
+ action='store_true',
+ help='Convert the output shape of the model (argument for convert_nchw_to_nhwc)')
+
+ # get backend list in the directory
+ backends_name = [ntpath.basename(f) for f in backends_list]
+ if not backends_name:
+ backends_name_message = '(There is no available backend drivers)'
+ else:
+ backends_name_message = '(available backend drivers: ' + ', '.join(
+ backends_name) + ')'
+ backend_help_message = 'backend name to use ' + backends_name_message
+ parser.add_argument('-b', '--backend', type=str, help=backend_help_message)
+
+ return parser
+
+
+def _verify_arg(parser, args):
+ # check if required arguments is given
+ missing = []
+ if not _utils._is_valid_attr(args, 'input_path'):
+ missing.append('-i/--input_path')
+ if not _utils._is_valid_attr(args, 'output_path'):
+ missing.append('-o/--output_path')
+ if not _utils._is_valid_attr(args, 'backend'):
+ missing.append('-b/--backend')
+
+ if _utils._is_valid_attr(args, 'model_type'):
+ # TODO Support model types other than onnx and tflite (e.g., TF)
+ if getattr(args, 'model_type') not in ['onnx', 'tflite']:
+ parser.error('Allowed value for --model_type: "onnx" or "tflite"')
+
+ if _utils._is_valid_attr(args, 'nchw_to_nhwc_input_shape'):
+ if not _utils._is_valid_attr(args, 'convert_nchw_to_nhwc'):
+ missing.append('--convert_nchw_to_nhwc')
+ if _utils._is_valid_attr(args, 'nchw_to_nhwc_output_shape'):
+ if not _utils._is_valid_attr(args, 'convert_nchw_to_nhwc'):
+ missing.append('--convert_nchw_to_nhwc')
+
+ if len(missing):
+ parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+ init_args = []
+ backend_args = []
+ argv = copy.deepcopy(sys.argv)
+ # delete file name
+ del argv[0]
+ # split by '--'
+ args = [list(y) for x, y in itertools.groupby(argv, lambda z: z == '--') if not x]
+
+ # one-init [-h] [-v] ...
+ if len(args):
+ init_args = args[0]
+ init_args = parser.parse_args(init_args)
+ backend_args = backend_args if len(args) < 2 else args[1]
+ # print version
+ if len(args) and init_args.version:
+ _utils._print_version_and_exit(__file__)
+
+ return init_args, backend_args
+
+
+def _get_executable(args, backends_list):
+ if _utils._is_valid_attr(args, 'backend'):
+ backend_base = getattr(args, 'backend') + '-init'
+ for cand in backends_list:
+ if ntpath.basename(cand) == backend_base:
+ return cand
+ raise FileNotFoundError(backend_base + ' not found')
+
+
+# TODO Support workflow format (https://github.com/Samsung/ONE/pull/9354)
+def _generate():
+ # generate cfg file
+ config = CommentableConfigParser()
+
+ def _add_onecc_sections():
+ pass # NYI
+
+ def _gen_import():
+ pass # NYI
+
+ def _gen_optimize():
+ pass # NYI
+
+ def _gen_quantize():
+ pass # NYI
+
+ def _gen_codegen():
+ pass # NYI
+
+ #
+ # NYI: one-profile, one-partition, one-pack, one-infer
+ #
+
+ _add_onecc_sections()
+
+ _gen_import()
+ _gen_optimize()
+ _gen_quantize()
+ _gen_codegen()
+
+ with open(args.output_path, 'w') as f:
+ config.write(f)
+
+
+def main():
+ # get backend list
+ backends_list = _get_backends_list()
+
+ # parse arguments
+ parser = _get_parser(backends_list)
+ args, backend_args = _parse_arg(parser)
+
+ # verify arguments
+ _verify_arg(parser, args)
+
+ # make a command to run given backend driver
+ driver_path = _get_executable(args, backends_list)
+ init_cmd = [driver_path] + backend_args
+
+ # run backend driver
+ _utils._run(init_cmd, err_prefix=ntpath.basename(driver_path))
+
+ #TODO generate cfg file
+
+ raise NotImplementedError("NYI")
+
+
+if __name__ == '__main__':
+ _utils._safemain(main, __file__)
diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize
index 8b1f3f7be..481fc8459 100644
--- a/compiler/one-cmds/one-optimize
+++ b/compiler/one-cmds/one-optimize
@@ -21,7 +21,6 @@
import argparse
import os
-import subprocess
import sys
import onelib.constant as _constant
@@ -83,6 +82,14 @@ def _verify_arg(parser, args):
if len(missing):
parser.error('the following arguments are required: ' + ' '.join(missing))
+ # default has pre-defined optimization options
+ default = _get_parser().parse_args()
+
+ # check if unrecognized arguments are given
+ diff = set(dir(args)) - set(dir(default))
+ if len(diff):
+ parser.error('the following arguments are unrecognized: ' + ' '.join(diff))
+
def _parse_arg(parser):
args = parser.parse_args()
@@ -102,8 +109,8 @@ def _optimize(args):
# make a command to optimize circle model
circle2circle_path = os.path.join(dir_path, 'circle2circle')
circle2circle_cmd = _make_cmd.make_circle2circle_cmd(args, circle2circle_path,
- getattr(args, 'input_path'),
- getattr(args, 'output_path'))
+ getattr(args, 'input_path'),
+ getattr(args, 'output_path'))
# verbose
if _utils._is_valid_attr(args, 'verbose'):
diff --git a/compiler/one-cmds/one-pack b/compiler/one-cmds/one-pack
index 133207de0..5cab7c737 100644
--- a/compiler/one-cmds/one-pack
+++ b/compiler/one-cmds/one-pack
@@ -21,9 +21,7 @@
import argparse
import os
-import subprocess
import sys
-import tempfile
import utils as _utils
diff --git a/compiler/one-cmds/one-partition b/compiler/one-cmds/one-partition
new file mode 100644
index 000000000..c0d71e5d9
--- /dev/null
+++ b/compiler/one-cmds/one-partition
@@ -0,0 +1,130 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@" # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255 # '''
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import configparser
+import os
+import sys
+
+import utils as _utils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+def _get_parser():
+ parser = argparse.ArgumentParser(
+ description='command line tool to partition circle model by multiple backends')
+
+ _utils._add_default_arg(parser)
+
+ parser.add_argument(
+ '--backends', type=str, help='backends in CSV to use for partitioning')
+ parser.add_argument('--default', type=str, help='default backend to assign')
+
+ parser.add_argument(
+ '--part_file', type=str, help='partition file which provides backend to assign')
+ parser.add_argument('--input_file', type=str, help='input circle model filename')
+ parser.add_argument(
+ '--work_path',
+ type=str,
+ help='work path of partition, input files exist and output files are produced')
+
+ return parser
+
+
+def _parse_arg(parser):
+ args = parser.parse_args()
+ # print version
+ if args.version:
+ _utils._print_version_and_exit(__file__)
+
+ return args
+
+
+def _verify_arg(parser, args):
+ """verify given arguments"""
+ # check if required arguments is given
+ missing = []
+ if not _utils._is_valid_attr(args, 'part_file'):
+ missing.append('part_file')
+ if not _utils._is_valid_attr(args, 'input_file'):
+ missing.append('input_file')
+ if len(missing):
+ parser.error('the following arguments are required: ' + ' '.join(missing))
+ return
+
+
+def _partition(args):
+ # get file path to log
+ bin_path = os.path.dirname(os.path.realpath(__file__))
+ cur_path = os.getcwd()
+ partition_path = os.path.join(cur_path, args.part_file)
+ logfile_path = partition_path + '.log'
+
+ with open(logfile_path, 'wb', buffering=0) as f:
+ # make a command to package circle model and metadata into nnpackage
+ circle_partitioner_path = os.path.join(bin_path, 'circle-partitioner')
+
+ cmd = [os.path.expanduser(circle_partitioner_path)]
+
+ if _utils._is_valid_attr(args, 'backends'):
+ cmd.append('--backends')
+ cmd.append(getattr(args, 'backends'))
+ if _utils._is_valid_attr(args, 'default'):
+ cmd.append('--default')
+ cmd.append(getattr(args, 'default'))
+ if _utils._is_valid_attr(args, 'work_path'):
+ cmd.append('--work_path')
+ cmd.append(getattr(args, 'work_path'))
+
+ cmd.append('--part_file')
+ cmd.append(args.part_file)
+ cmd.append('--input_file')
+ cmd.append(args.input_file)
+
+ f.write((' '.join(cmd) + '\n').encode())
+
+ # run circle-partitoner
+ _utils._run(cmd, err_prefix='circle-partitioner', logfile=f)
+
+
+def main():
+ # parse arguments
+ parser = _get_parser()
+ args = _parse_arg(parser)
+
+ # parse configuration file
+ _utils._parse_cfg(args, 'one-partition')
+
+ if _utils._is_valid_attr(args, 'config'):
+ config_path = getattr(args, 'config')
+ _utils._parse_cfg_and_overwrite(config_path, 'one-partition', args)
+
+ # verify arguments
+ _verify_arg(parser, args)
+
+ # do partition
+ _partition(args)
+
+
+if __name__ == '__main__':
+ _utils._safemain(main, __file__)
diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv
index 0f75166a7..b435671f4 100644
--- a/compiler/one-cmds/one-prepare-venv
+++ b/compiler/one-cmds/one-prepare-venv
@@ -41,6 +41,7 @@ VER_ONNX_TF=1.10.0
# Install tensorflow
PIP_TRUSTED_HOST="--trusted-host pypi.org "
+PIP_TRUSTED_HOST+="--trusted-host pypi.python.org "
PIP_TRUSTED_HOST+="--trusted-host files.pythonhost.org "
PIP_TRUSTED_HOST+="--trusted-host download.pytorch.org "
@@ -62,7 +63,8 @@ else
${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW}
fi
${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow
-${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability
+# TODO remove version fix, https://github.com/Samsung/ONE/issues/9240
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability==0.16.0
# Install PyTorch and ONNX related
# NOTE set ONE_PREPVENV_TORCH_STABLE to override 'torch_stable.html' URL.
@@ -72,6 +74,8 @@ TORCH_STABLE_URL="https://download.pytorch.org/whl/torch_stable.html"
if [[ ! -z "$ONE_PREPVENV_TORCH_STABLE" ]]; then
TORCH_STABLE_URL="${ONE_PREPVENV_TORCH_STABLE}"
fi
+# TODO remove torch message
+echo "Torch from '${ONE_PREPVENV_TORCH_STABLE}' -> '${TORCH_STABLE_URL}'"
${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.11.0+cpu -f ${TORCH_STABLE_URL}
${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX}
@@ -84,3 +88,7 @@ if [ -n "${EXT_ONNX_TF_WHL}" ]; then
else
${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx-tf==${VER_ONNX_TF}
fi
+
+# NOTE refer https://github.com/protocolbuffers/protobuf/issues/10051
+# TODO remove this when issue is resolved
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade protobuf==3.20.1
diff --git a/compiler/one-cmds/one-profile b/compiler/one-cmds/one-profile
index ed6d8bd7a..b19c215ed 100644
--- a/compiler/one-cmds/one-profile
+++ b/compiler/one-cmds/one-profile
@@ -25,9 +25,7 @@ import glob
import itertools
import ntpath
import os
-import subprocess
import sys
-import tempfile
import utils as _utils
diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize
index f2eff24bd..9282007d8 100644
--- a/compiler/one-cmds/one-quantize
+++ b/compiler/one-cmds/one-quantize
@@ -21,11 +21,12 @@
import argparse
import os
-import subprocess
import sys
import tempfile
+import json
import utils as _utils
+from utils import Command
# TODO Find better way to suppress trackback on error
sys.tracebacklimit = 0
@@ -67,6 +68,12 @@ def _get_parser():
action='store_true',
help='generate profiling data')
+ # save intermediate file(s)
+ parser.add_argument(
+ '--save_intermediate',
+ action='store_true',
+ help='Save intermediate files to output folder')
+
## arguments for quantization
quantization_group = parser.add_argument_group('arguments for quantization')
@@ -93,13 +100,13 @@ def _get_parser():
'--input_type',
type=str,
help=
- 'data type of inputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.'
+ 'data type of inputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the beginning of the quantized model if input_type is different from quantized_dtype.'
)
quantization_group.add_argument(
'--output_type',
type=str,
help=
- 'data type of outputs of quantized model (supported: uint8, int16, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.'
+ 'data type of outputs of quantized model (supported: uint8, int16, float32, default=quantized_dtype). QUANTIZE Op will be inserted at the end of the quantized model if output_type is different from quantized_dtype.'
)
quantization_group.add_argument(
'--min_percentile',
@@ -126,10 +133,50 @@ def _get_parser():
"Force MaxPool Op to have the same input/output quantparams. NOTE: This option can degrade accuracy of some models.)"
)
quantization_group.add_argument(
- '--quant_config',
- type=str,
+ '--quant_config', type=str, help="Path to the quantization configuration file.")
+ quantization_group.add_argument(
+ '--evaluate_result',
+ action='store_true',
+ help=
+ "Evaluate accuracy of quantized model. Run inference for both fp32 model and the quantized model, and compare the inference results."
+ )
+ quantization_group.add_argument(
+ '--test_data', type=str, help="Path to the test data used for evaluation.")
+ quantization_group.add_argument(
+ '--print_mae',
+ action='store_true',
+ help=
+ "Print MAE (Mean Absolute Error) of inference results between quantized model and fp32 model."
+ )
+ quantization_group.add_argument(
+ '--print_mape',
+ action='store_true',
+ help=
+ "Print MAPE (Mean Absolute Percentage Error) of inference results between quantized model and fp32 model."
+ )
+ quantization_group.add_argument(
+ '--print_mpeir',
+ action='store_true',
+ help=
+ "Print MPEIR (Mean Peak Error to Interval Ratio) of inference results between quantized model and fp32 model."
+ )
+ quantization_group.add_argument(
+ '--print_top1_match',
+ action='store_true',
+ help=
+ "Print Top-1 match ratio of inference results between quantized model and fp32 model."
+ )
+ quantization_group.add_argument(
+ '--print_top5_match',
+ action='store_true',
+ help=
+ "Print Top-5 match ratio of inference results between quantized model and fp32 model."
+ )
+ quantization_group.add_argument(
+ '--print_mse',
+ action='store_true',
help=
- "Path to the quantization configuration file."
+ "Print MSE (Mean Squared Error) of inference results between quantized model and fp32 model."
)
# arguments for force_quantparam option
@@ -162,6 +209,14 @@ def _get_parser():
copy_quantparam_group.add_argument(
'--dst_tensor_name', type=str, action='append', help='tensor name (string)')
+ # arguments for fake_quant option
+ fake_quant_group = parser.add_argument_group('arguments for fake_quantize option')
+
+ fake_quant_group.add_argument(
+ '--fake_quantize',
+ action='store_true',
+ help='convert quantized model to fake-quantized fp32 model.')
+
return parser
@@ -171,8 +226,29 @@ def _set_default_values(args):
setattr(args, 'input_model_dtype', 'float32')
if not _utils._is_valid_attr(args, 'quantized_dtype'):
setattr(args, 'quantized_dtype', 'uint8')
+ if _utils._is_valid_attr(args, 'quant_config'):
+ # Get quantized_dtype from qconfig file
+ try:
+ with open(getattr(args, 'quant_config')) as f:
+ qconf = json.load(f)
+ if 'default_quantization_dtype' in qconf:
+ setattr(args, 'quantized_dtype',
+ qconf['default_quantization_dtype'])
+ except json.decoder.JSONDecodeError:
+ print('Failed to decode ' + getattr(args, 'quant_config') +
+ '. Please check it is a json file.')
if not _utils._is_valid_attr(args, 'granularity'):
setattr(args, 'granularity', 'layer')
+ if _utils._is_valid_attr(args, 'quant_config'):
+ # Get granularity from qconfig file
+ try:
+ with open(getattr(args, 'quant_config')) as f:
+ qconf = json.load(f)
+ if 'default_granularity' in qconf:
+ setattr(args, 'granularity', qconf['default_granularity'])
+ except json.decoder.JSONDecodeError:
+ print('Failed to decode ' + getattr(args, 'quant_config') +
+ '. Please check it is a json file.')
if not _utils._is_valid_attr(args, 'mode'):
setattr(args, 'mode', 'percentile')
if not _utils._is_valid_attr(args, 'min_percentile'):
@@ -238,11 +314,18 @@ def _quantize(args):
_copy_qparam(args)
return
+ if _utils._is_valid_attr(args, 'fake_quantize'):
+ # fake-quantize model
+ _fake_quantize(args)
+ return
+
# get file path to log
dir_path = os.path.dirname(os.path.realpath(__file__))
logfile_path = os.path.realpath(args.output_path) + '.log'
with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
+ if _utils._is_valid_attr(args, 'save_intermediate'):
+ tmpdir = os.path.dirname(logfile_path)
# get driver path
circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
record_minmax_path = os.path.join(dir_path, 'record-minmax')
@@ -263,13 +346,19 @@ def _quantize(args):
circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
if _utils._is_valid_attr(args, 'granularity'):
circle_quantizer_cmd.append(getattr(args, 'granularity'))
+ if _utils._is_valid_attr(args, 'quant_config'):
+ # NOTE --config conflicts with --config option in onecc, so
+ # we use quant_config for one-quantize
+ circle_quantizer_cmd.append('--config')
+ circle_quantizer_cmd.append(getattr(args, 'quant_config'))
# input and output path
if _utils._is_valid_attr(args, 'input_path'):
circle_quantizer_cmd.append(getattr(args, 'input_path'))
- tmp_output_path_1 = os.path.join(
+ tmp_weights_fake_quant_path = os.path.join(
tmpdir,
- os.path.splitext(os.path.basename(args.input_path))[0]) + '1.circle'
- circle_quantizer_cmd.append(tmp_output_path_1)
+ os.path.splitext(os.path.basename(
+ args.input_path))[0]) + '.weights_fake_quant.circle'
+ circle_quantizer_cmd.append(tmp_weights_fake_quant_path)
# profiling
if _utils._is_valid_attr(args, 'generate_profile_data'):
circle_quantizer_cmd.append('--generate_profile_data')
@@ -279,45 +368,23 @@ def _quantize(args):
# run circle-quantizer
_utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
- ## make a command to record min-max value of each tensor while running the representative dataset
- circle_record_minmax_cmd = [record_minmax_path]
- # verbose
- if _utils._is_valid_attr(args, 'verbose'):
- circle_record_minmax_cmd.append('--verbose')
- # input and output path
- circle_record_minmax_cmd.append('--input_model')
- circle_record_minmax_cmd.append(tmp_output_path_1)
- tmp_output_path_2 = os.path.join(
+ tmp_minmax_recorded_path = os.path.join(
tmpdir,
- os.path.splitext(os.path.basename(args.input_path))[0]) + '2.circle'
- circle_record_minmax_cmd.append('--output_model')
- circle_record_minmax_cmd.append(tmp_output_path_2)
- # input data
- if _utils._is_valid_attr(args, 'input_data'):
- circle_record_minmax_cmd.append('--input_data')
- circle_record_minmax_cmd.append(getattr(args, 'input_data'))
- if _utils._is_valid_attr(args, 'input_data_format'):
- circle_record_minmax_cmd.append('--input_data_format')
- circle_record_minmax_cmd.append(getattr(args, 'input_data_format'))
- # min and max percentile
- if _utils._is_valid_attr(args, 'min_percentile'):
- circle_record_minmax_cmd.append('--min_percentile')
- circle_record_minmax_cmd.append(getattr(args, 'min_percentile'))
- if _utils._is_valid_attr(args, 'max_percentile'):
- circle_record_minmax_cmd.append('--max_percentile')
- circle_record_minmax_cmd.append(getattr(args, 'max_percentile'))
- # mode
- if _utils._is_valid_attr(args, 'mode'):
- circle_record_minmax_cmd.append('--mode')
- circle_record_minmax_cmd.append(getattr(args, 'mode'))
- # profiling
- if _utils._is_valid_attr(args, 'generate_profile_data'):
- circle_record_minmax_cmd.append('--generate_profile_data')
-
- f.write((' '.join(circle_record_minmax_cmd) + '\n').encode())
+ os.path.splitext(os.path.basename(
+ args.input_path))[0]) + '.minmax_recorded.circle'
- # run record-minmax
- _utils._run(circle_record_minmax_cmd, err_prefix="record_minmax", logfile=f)
+ ## make a command to record min-max value of each tensor while running the representative dataset
+ record_minmax_cmd = Command(record_minmax_path, args, f)
+ record_minmax_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+ .add_option_with_values('--input_model', [tmp_weights_fake_quant_path]) \
+ .add_option_with_values('--output_model', [tmp_minmax_recorded_path]) \
+ .add_option_with_valid_args('--input_data', ['input_data']) \
+ .add_option_with_valid_args('--input_data_format', ['input_data_format']) \
+ .add_option_with_valid_args('--min_percentile', ['min_percentile']) \
+ .add_option_with_valid_args('--max_percentile', ['max_percentile']) \
+ .add_option_with_valid_args('--mode', ['mode']) \
+ .add_noarg_option_if_valid_arg('--generate_profile_data', 'generate_profile_data') \
+ .run()
## make a second command to quantize the model using the embedded information
circle_quantizer_cmd = [circle_quantizer_path]
@@ -349,7 +416,7 @@ def _quantize(args):
circle_quantizer_cmd.append('--config')
circle_quantizer_cmd.append(getattr(args, 'quant_config'))
# input and output path
- circle_quantizer_cmd.append(tmp_output_path_2)
+ circle_quantizer_cmd.append(tmp_minmax_recorded_path)
if _utils._is_valid_attr(args, 'output_path'):
circle_quantizer_cmd.append(getattr(args, 'output_path'))
# profiling
@@ -361,6 +428,38 @@ def _quantize(args):
# run circle-quantizer
_utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+ # evaluate
+ if _utils._is_valid_attr(args, 'evaluate_result'):
+ circle_eval_diff_path = os.path.join(dir_path, 'circle-eval-diff')
+ quant_model = ""
+ if _utils._is_valid_attr(args, 'output_path'):
+ quant_model = getattr(args, 'output_path')
+ tmp_fake_quant_model = os.path.join(
+ tmpdir,
+ os.path.splitext(os.path.basename(
+ args.input_path))[0]) + '.fake_quant.circle'
+
+ # do fake quantization
+ fake_quantize_cmd = Command(circle_quantizer_path, args, f)
+ fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+ .add_option_with_values('--fake_quantize', [quant_model, tmp_fake_quant_model]) \
+ .run()
+
+ # compare fake-quant model and fp32 model
+ circle_eval_diff_cmd = Command(circle_eval_diff_path, args, f)
+ circle_eval_diff_cmd.add_option_with_valid_args('--first_model', ['input_path']) \
+ .add_option_with_values('--second_model', [tmp_fake_quant_model]) \
+ .add_option_with_valid_args('--first_input_data', ['test_data']) \
+ .add_option_with_valid_args('--second_input_data', ['test_data']) \
+ .add_option_with_valid_args('--input_data_format', ['input_data_format']) \
+ .add_noarg_option_if_valid_arg('--print_mae', 'print_mae') \
+ .add_noarg_option_if_valid_arg('--print_mape', 'print_mape') \
+ .add_noarg_option_if_valid_arg('--print_mpeir', 'print_mpeir') \
+ .add_noarg_option_if_valid_arg('--print_top1_match', 'print_top1_match') \
+ .add_noarg_option_if_valid_arg('--print_top5_match', 'print_top5_match') \
+ .add_noarg_option_if_valid_arg('--print_mse', 'print_mse') \
+ .run()
+
def _write_qparam(args):
# get file path to log
@@ -433,6 +532,24 @@ def _copy_qparam(args):
_utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+def _fake_quantize(args):
+ # get file path to log
+ dir_path = os.path.dirname(os.path.realpath(__file__))
+ logfile_path = os.path.realpath(args.output_path) + '.log'
+
+ with open(logfile_path, 'wb') as f:
+ # get driver path
+ circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+ q_model = getattr(args, 'input_path')
+ fq_model = getattr(args, 'output_path')
+
+ # do fake quantization
+ fake_quantize_cmd = Command(circle_quantizer_path, args, f)
+ fake_quantize_cmd.add_noarg_option_if_valid_arg('--verbose', 'verbose') \
+ .add_option_with_values('--fake_quantize', [q_model, fq_model]) \
+ .run()
+
+
def main():
# parse arguments
parser = _get_parser()
diff --git a/compiler/one-cmds/onecc b/compiler/one-cmds/onecc
index 25682ff4b..a5ba636a2 100644
--- a/compiler/one-cmds/onecc
+++ b/compiler/one-cmds/onecc
@@ -25,6 +25,8 @@ import os
import subprocess
import sys
+from onelib.CfgRunner import CfgRunner
+from onelib.WorkflowRunner import WorkflowRunner
import utils as _utils
# TODO Find better way to suppress trackback on error
@@ -42,6 +44,7 @@ subtool_list = {
'backend': {
'codegen': 'Code generation tool',
'profile': 'Profile backend model file',
+ 'infer': 'Infer backend model file'
},
}
@@ -64,12 +67,25 @@ def _check_subtool_exists():
def _get_parser():
- onecc_usage = 'onecc [-h] [-v] [-C CONFIG] [COMMAND <args>]'
+ onecc_usage = 'onecc [-h] [-v] [-C CONFIG] [-W WORKFLOW] [-O OPTIMIZATION] [COMMAND <args>]'
onecc_desc = 'Run ONE driver via several commands or configuration file'
parser = argparse.ArgumentParser(description=onecc_desc, usage=onecc_usage)
_utils._add_default_arg(parser)
+ opt_name_list = _utils._get_optimization_list(get_name=True)
+ opt_name_list = ['-' + s for s in opt_name_list]
+ if not opt_name_list:
+ opt_help_message = '(No available optimization options)'
+ else:
+ opt_help_message = '(Available optimization options: ' + ', '.join(
+ opt_name_list) + ')'
+ opt_help_message = 'optimization name to use ' + opt_help_message
+ parser.add_argument('-O', type=str, metavar='OPTIMIZATION', help=opt_help_message)
+
+ parser.add_argument(
+ '-W', '--workflow', type=str, metavar='WORKFLOW', help='run with workflow file')
+
# just for help message
compile_group = parser.add_argument_group('compile to circle model')
for tool, desc in subtool_list['compile'].items():
@@ -98,45 +114,17 @@ def _parse_arg(parser):
def _verify_arg(parser, args):
"""verify given arguments"""
# check if required arguments is given
- if not _utils._is_valid_attr(args, 'config'):
- parser.error('-C/--config argument is required')
-
-
-def _get_driver_name(driver_name):
- return {
- 'one-optimize': 'one-optimize',
- 'one-quantize': 'one-quantize',
- 'one-pack': 'one-pack',
- 'one-codegen': 'one-codegen',
- 'one-profile': 'one-profile'
- }[driver_name]
-
-
-def _parse_cfg(args):
- config = configparser.ConfigParser()
- config.optionxform = str
- parsed = config.read(os.path.expanduser(getattr(args, 'config')))
- if not parsed:
- raise FileNotFoundError('Not found given configuration file')
- return config
-
-
-def _is_available_driver(config, driver_name):
- return config.has_option('onecc', driver_name) and config.getboolean(
- 'onecc', driver_name)
-
-
-def _verify_cfg(import_driver_list, config):
- if not config.has_section('onecc'):
- raise ImportError('[onecc] section is required in configuration file')
-
- import_driver_cnt = 0
- for d in import_driver_list:
- if _is_available_driver(config, d):
- import_driver_cnt += 1
-
- if import_driver_cnt > 1:
- raise AssertionError('Only one import-* driver can be executed')
+ if not _utils._is_valid_attr(args, 'config') and not _utils._is_valid_attr(
+ args, 'workflow'):
+ parser.error('-C/--config or -W/--workflow argument is required')
+ # check if given optimization option exists
+ opt_name_list = _utils._get_optimization_list(get_name=True)
+ opt_name_list = [_utils._remove_prefix(s, 'O') for s in opt_name_list]
+ if _utils._is_valid_attr(args, 'O'):
+ if ' ' in getattr(args, 'O'):
+ parser.error('Not allowed to have space in the optimization name')
+ if not getattr(args, 'O') in opt_name_list:
+ parser.error('Invalid optimization option')
def main():
@@ -158,35 +146,16 @@ def main():
# verify arguments
_verify_arg(parser, args)
- # parse configuration file
- config = _parse_cfg(args)
-
- # verify configuration file
bin_dir = os.path.dirname(os.path.realpath(__file__))
- import_drivers_dict = _utils._detect_one_import_drivers(bin_dir)
- transform_drivers = [
- 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile'
- ]
- _verify_cfg(import_drivers_dict, config)
-
- # get sections to run
- section_to_run = []
- for d in list(import_drivers_dict) + transform_drivers:
- if _is_available_driver(config, d):
- section_to_run.append(d)
-
- # run
- dir_path = os.path.dirname(os.path.realpath(__file__))
- for section in section_to_run:
- if section in import_drivers_dict:
- # we already has driver name in dict
- driver_name = import_drivers_dict[section]
- else:
- driver_name = _get_driver_name(section)
- options = ['--config', getattr(args, 'config'), '--section', section]
- if _utils._is_valid_attr(args, 'verbose'):
- options.append('--verbose')
- _call_driver(driver_name, options)
+ if _utils._is_valid_attr(args, 'config'):
+ runner = CfgRunner(args.config)
+ runner.detect_import_drivers(bin_dir)
+ if _utils._is_valid_attr(args, 'O'):
+ runner.add_opt(getattr(args, 'O'))
+ runner.run(bin_dir)
+ elif _utils._is_valid_attr(args, 'workflow'):
+ runner = WorkflowRunner(args.workflow)
+ runner.run(bin_dir)
if __name__ == '__main__':
diff --git a/compiler/one-cmds/onecc.template.cfg b/compiler/one-cmds/onecc.template.cfg
index a23d1cea9..6f6a4e266 100644
--- a/compiler/one-cmds/onecc.template.cfg
+++ b/compiler/one-cmds/onecc.template.cfg
@@ -1,28 +1,144 @@
+; To activate a step (or task),
+; set True for the step in [onecc] section and fill options in the corresponding section
[onecc]
-one-import-tf=True
+; neural network model to circle
+one-import-tf=False
one-import-tflite=False
one-import-bcq=False
one-import-onnx=False
-one-optimize=True
+; circle to circle with optimization
+one-optimize=False
+; circle to circle with quantization
one-quantize=False
-one-pack=True
+; partition circle
+one-partition=False
+; package circle and metadata into nnpackage
+one-pack=False
+; generate code for backend
one-codegen=False
+; profile
one-profile=False
+; infer
+one-infer=False
[one-import-tf]
-input_path=/path/to/inception_v3.pb
-output_path=inception_v3.circle
-input_arrays=input
-input_shapes=1,299,299,3
-output_arrays=InceptionV3/Predictions/Reshape_1
-converter_version=v1
+# mandatory
+; pb file
+input_path=
+; circle file
+output_path=
+# optional
+; v1 or v2
+converter_version=v2
+; graph_def(default), saved_model or keras_model
model_format=graph_def
+# optional but mandatory for model_format=graph_def
+; input tensor names of the input arrays, comma-separated
+input_arrays=
+; output tensor names of the input arrays, comma-separated
+output_arrays=
+; input shapes corresponding to --input_arrays, colon-separated.(ex:1,4,4,3:1,20,20,3)
+input_shapes=
+
+[one-import-tflite]
+# mandatory
+; tflite file
+input_path=
+; circle file
+output_path=
+
+[one-import-bcq]
+# mandatory
+; bcq file
+input_path=
+; circle file
+output_path=
+# optional
+; v1 or v2
+converter_version=v2
+; graph_def(default), saved_model or keras_model
+model_format=graph_def
+# optional but mandatory for model_format=graph_def
+; input tensor names of the input arrays, comma-separated
+input_arrays=
+; output tensor names of the input arrays, comma-separated
+output_arrays=
+; input shapes corresponding to --input_arrays, colon-separated.(ex:1,4,4,3:1,20,20,3)
+input_shapes=
+
+[one-import-onnx]
+# mandatory
+; onnx file
+input_path=
+; circle file
+output_path=
+# optional
+; True or False
+unroll_rnn=
+; True or False
+unroll_lstm=
[one-optimize]
-input_path=inception_v3.circle
-output_path=inception_v3.opt.circle
-generate_profile_data=False
+# mandatory
+; circle file
+input_path=
+; circle file
+output_path=
+# //TODO: Add available options
+
+[one-quantize]
+# mandatory
+; circle file
+input_path=
+; circle file
+output_path=
+# optional arguments for quantization
+; input data file (if not given, random data will be used for calibration)
+input_data=
+; h5/hdf5(default), list/filelist, or dir/directory
+input_data_format=
+; dtype of quantized model (uint8(default), int16)
+quantized_dtype=
+; granularity of quantization (layer(default), channel)
+granularity=
+; dtype of model's input (uint8, int16, float32). Same with quantized_dtype by default.
+input_type=
+; dtype of model's output (uint8, int16, float32). Same with quantized_dtype by default.
+output_type=
+
+[one-partition]
+# mandatory
+; partition file which provides backend to assign
+part_file=
+; circle file
+input_file=
+# //TODO: Add available options
[one-pack]
-input_path=inception_v3.opt.circle
-output_path=inception_v3_pack
+# mandatory
+; input path
+input_path=
+; output path
+output_path=
+# //TODO: Add available options
+
+[one-codegen]
+# mandatory
+; backend name
+backend=
+; commands for each backend
+command=
+
+[one-profile]
+# mandatory
+; backend name
+backend=
+# //TODO: Add available options
+
+[one-infer]
+# mandatory (mutually exclusive)
+; backend name
+backend=
+; driver name
+driver=
+# //TODO: Add available options
diff --git a/compiler/one-cmds/onelib/CfgRunner.py b/compiler/one-cmds/onelib/CfgRunner.py
new file mode 100644
index 000000000..c66e5b4ba
--- /dev/null
+++ b/compiler/one-cmds/onelib/CfgRunner.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import configparser
+import os
+import warnings
+
+import utils as oneutils
+
+
+def _simple_warning(message, category, filename, lineno, file=None, line=None):
+ return f'{category.__name__}: {message}\n'
+
+
+class CfgRunner:
+ driver_sequence = [
+ 'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile',
+ 'one-partition', 'one-infer'
+ ]
+
+ def __init__(self, path):
+ self.path = path
+ self.optparser = None
+ self.cfgparser = configparser.ConfigParser()
+ # make option names case sensitive
+ self.cfgparser.optionxform = str
+ parsed = self.cfgparser.read(os.path.expanduser(path))
+ if not parsed:
+ raise FileNotFoundError('Not found given configuration file')
+
+ self._verify_cfg(self.cfgparser)
+ # default import drivers
+ self.import_drivers = [
+ 'one-import-bcq', 'one-import-onnx', 'one-import-tf', 'one-import-tflite'
+ ]
+
+ def _verify_cfg(self, cfgparser):
+ if not cfgparser.has_section('onecc'):
+ if cfgparser.has_section('one-build'):
+ warnings.formatwarning = _simple_warning
+ warnings.warn(
+ "[one-build] section will be deprecated. Please use [onecc] section.")
+ else:
+ raise ImportError('[onecc] section is required in configuration file')
+
+ def _is_available(self, driver):
+ # if there's no `onecc` section, it will find `one-build` section because of backward compatibility
+ return (self.cfgparser.has_option('onecc', driver) and self.cfgparser.getboolean(
+ 'onecc', driver)) or (self.cfgparser.has_option('one-build', driver)
+ and self.cfgparser.getboolean('one-build', driver))
+
+ def add_opt(self, opt):
+ self.optparser = configparser.ConfigParser()
+ # make option names case sensitive
+ self.optparser.optionxform = str
+ opt_book = dict(
+ zip(oneutils._get_optimization_list(get_name=True),
+ oneutils._get_optimization_list()))
+ parsed = self.optparser.read(opt_book['O' + opt])
+ if not parsed:
+ raise FileNotFoundError('Not found given optimization configuration file')
+ if len(self.optparser.sections()) != 1 or self.optparser.sections(
+ )[0] != 'one-optimize':
+ raise AssertionError(
+ 'Optimization configuration file only allowed to have a \'one-optimize\' section'
+ )
+ self.opt = opt
+
+ def detect_import_drivers(self, dir):
+ self.import_drivers = list(oneutils._detect_one_import_drivers(dir).keys())
+
+ def run(self, working_dir, verbose=False):
+ section_to_run = []
+ for d in self.import_drivers + self.driver_sequence:
+ if self._is_available(d):
+ section_to_run.append(d)
+
+ for section in section_to_run:
+ options = ['--config', self.path, '--section', section]
+ if section == 'one-optimize' and self.optparser:
+ options += ['-O', self.opt]
+ if verbose:
+ options.append('--verbose')
+ driver_path = os.path.join(working_dir, section)
+ cmd = [driver_path] + options
+ oneutils._run(cmd)
diff --git a/compiler/one-cmds/onelib/OptionBuilder.py b/compiler/one-cmds/onelib/OptionBuilder.py
new file mode 100644
index 000000000..6a75783ad
--- /dev/null
+++ b/compiler/one-cmds/onelib/OptionBuilder.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from onelib.constant import CONSTANT
+
+
+class OptionBuilder:
+ def __init__(self, one_cmd_type):
+ self.type = one_cmd_type
+
+ def _build_default(self, commands):
+ options = []
+ for k, v in commands.items():
+ options.extend(['--' + k, v])
+ return options
+
+ def _build_with_unknown_command(self, commands):
+ COMMAND_K = 'command'
+ options = []
+ for k, v in commands.items():
+ if k == COMMAND_K:
+ continue
+ options.extend(['--' + k, v])
+ options.extend(['--'])
+ options.extend(commands[COMMAND_K].split())
+ return options
+
+ def _build_import(self, commands):
+ options = []
+ arg_0 = ['save_intermediate']
+ for k, v in commands.items():
+ if k in arg_0 and v == "True":
+ options.extend(['--' + k])
+ continue
+ options.extend(['--' + k, v])
+ return options
+
+ def _build_optimize(self, commands):
+ options = []
+ arg_0 = ['generate_profile_data']
+ arg_1 = ['input_path', 'output_path', 'change_outputs']
+ for k, v in commands.items():
+ if k in arg_1:
+ options.extend(['--' + k, v])
+ continue
+ if k in arg_0 and v == 'True':
+ options.extend(['--' + k])
+ continue
+ for opt in CONSTANT.OPTIMIZATION_OPTS:
+ if k == opt[0] and v == "True":
+ options.extend(['--' + k])
+ break
+ return options
+
+ def _build_quantize(self, commands):
+ options = []
+ arg_0 = [
+ 'generate_profile_data', 'save_intermediate', 'TF-style_maxpool',
+ 'evaluate_result', 'print_mae', 'print_mape', 'print_mpeir',
+ 'print_top1_match', 'print_top5_match', 'force_quantparam', 'copy_quantparam'
+ ]
+ for k, v in commands.items():
+ if k in arg_0 and v == "True":
+ options.extend(['--' + k])
+ continue
+ options.extend(['--' + k, v])
+ return options
+
+ def build(self, commands):
+ cmd_book = dict.fromkeys(
+ ['one-import-bcq', 'one-import-tflite', 'one-pack', 'one-partition'],
+ self._build_default)
+ cmd_book['one-codegen'] = self._build_with_unknown_command
+ cmd_book['one-import-onnx'] = self._build_import
+ cmd_book['one-import-pytorch'] = self._build_import
+ cmd_book['one-import-tf'] = self._build_import
+ cmd_book['one-infer'] = self._build_with_unknown_command
+ cmd_book['one-optimize'] = self._build_optimize
+ cmd_book['one-profile'] = self._build_with_unknown_command
+ cmd_book['one-quantize'] = self._build_quantize
+
+ return cmd_book[self.type](commands)
diff --git a/compiler/one-cmds/onelib/TopologicalSortHelper.py b/compiler/one-cmds/onelib/TopologicalSortHelper.py
new file mode 100644
index 000000000..d05adea8d
--- /dev/null
+++ b/compiler/one-cmds/onelib/TopologicalSortHelper.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import defaultdict
+
+
+class TopologicalSortHelper:
+ def __init__(self, vertices):
+ self.graph = defaultdict(list)
+ self.vertices = vertices
+
+ def add_edge(self, u, v):
+ self.graph[u].append(v)
+
+ def sort_util(self, v, visited, stack):
+ visited[v] = True
+
+ for i in self.graph[v]:
+ if visited[i] == False:
+ self.sort_util(i, visited, stack)
+
+ stack.insert(0, v)
+
+ def sort(self):
+ visited = dict.fromkeys(self.vertices, False)
+ stack = []
+
+ for v in self.vertices:
+ if visited[v] == False:
+ self.sort_util(v, visited, stack)
+
+ return stack
diff --git a/compiler/one-cmds/onelib/WorkflowRunner.py b/compiler/one-cmds/onelib/WorkflowRunner.py
new file mode 100644
index 000000000..0482dd9da
--- /dev/null
+++ b/compiler/one-cmds/onelib/WorkflowRunner.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+
+from onelib.OptionBuilder import OptionBuilder
+from onelib.TopologicalSortHelper import TopologicalSortHelper
+from onelib.CfgRunner import CfgRunner
+import utils as oneutils
+
+
+class WorkflowRunner:
+ WORKFLOWS_K = 'workflows'
+ DEPENDENCIES_K = 'run-after'
+ CFG_REFERENCE_K = 'cfg-reference'
+ WORKFLOW_STEPS_K = 'steps'
+ ONE_CMD_TOOL_K = 'one-cmd'
+ COMMANDS_K = 'commands'
+
+ def __init__(self, path):
+ try:
+ with open(path) as f:
+ self.json_contents = json.load(f)
+ except FileNotFoundError:
+ raise FileNotFoundError("Not found given workflow file")
+ except json.decoder.JSONDecodeError:
+ raise ImportError("Invalid workflow file")
+
+ self._verify_workflow(self.json_contents)
+
+ workflows = self.json_contents[self.WORKFLOWS_K]
+ self.adj = dict.fromkeys(workflows, [])
+ # decide the order according to the dependencies of each workflow.
+ helper = TopologicalSortHelper(workflows)
+ for workflow_k in workflows:
+ workflow = self.json_contents[workflow_k]
+ if self.DEPENDENCIES_K in workflow:
+ for previous_workflow in workflow[self.DEPENDENCIES_K]:
+ helper.add_edge(previous_workflow, workflow_k)
+ self.adj[previous_workflow].append(workflow_k)
+ self.workflow_sequence = helper.sort()
+
+ self._check_cycle()
+
+ def _check_cycle(self):
+ pos = dict()
+ index = 0
+ workflow_num = len(self.workflow_sequence)
+ # number the order
+ for seq_idx in range(workflow_num):
+ pos[self.workflow_sequence[seq_idx]] = index
+ index += 1
+
+ for seq_idx in range(workflow_num):
+ first_wf = self.workflow_sequence[seq_idx]
+ for adj_wf in self.adj[first_wf]:
+ first_pos = 0 if first_wf not in pos else pos[first_wf]
+ second_pos = 0 if adj_wf not in pos else pos[adj_wf]
+ if (first_pos > second_pos):
+ raise RuntimeError("Workflows should not have a cycle")
+
+ def _verify_workflow(self, json_contents):
+ # workflow file should have WORKFLOWS_K
+ if not self.WORKFLOWS_K in json_contents:
+ raise ValueError("Not found \"" + self.WORKFLOWS_K +
+ "\" key in workflow file")
+
+ workflows = json_contents[self.WORKFLOWS_K]
+ # workflow file should have keys listed in WORKFLOWS_K
+ for workflow_k in workflows:
+ if not workflow_k in json_contents:
+ raise ValueError("Not found " + workflow_k + " key listed in \"" +
+ self.WORKFLOWS_K + "\"")
+
+ # each workflow should have either WORKFLOW_STEPS_K or CFG_REFERENCE_K
+ for workflow_k in workflows:
+ if not self.WORKFLOW_STEPS_K in json_contents[workflow_k] and not self.CFG_REFERENCE_K in json_contents[workflow_k]:
+ raise ValueError("Each workflow should have either \"" +
+ self.WORKFLOW_STEPS_K + "\" or \"" +
+ self.CFG_REFERENCE_K + "\"")
+ for workflow_k in workflows:
+ if self.WORKFLOW_STEPS_K in json_contents[workflow_k] and self.CFG_REFERENCE_K in json_contents[workflow_k]:
+ raise ValueError("\"" + self.WORKFLOW_STEPS_K + "\" and \"" +
+ self.CFG_REFERENCE_K + "\" are exclusive key")
+
+ # each step should have ONE_CMD_TOOL_K and COMMANDS_K
+ for workflow_k in workflows:
+ workflow = json_contents[workflow_k]
+ if self.WORKFLOW_STEPS_K in workflow:
+ step_keys = workflow[self.WORKFLOW_STEPS_K]
+ for step_k in step_keys:
+ step = workflow[step_k]
+ if not self.ONE_CMD_TOOL_K in step or not self.COMMANDS_K in step:
+ raise ValueError("Each step should have \"" +
+ self.ONE_CMD_TOOL_K + "\"" + " and \"" +
+ self.COMMANDS_K + "\"")
+
+ def run(self, working_dir, verbose=False):
+ # run workflows in sequence
+ for workflow_k in self.workflow_sequence:
+ workflow = self.json_contents[workflow_k]
+ if self.WORKFLOW_STEPS_K in workflow:
+ steps = workflow[self.WORKFLOW_STEPS_K]
+ for step_k in steps:
+ step = workflow[step_k]
+ commands = step[self.COMMANDS_K]
+ driver_name = step[self.ONE_CMD_TOOL_K]
+ option_builder = OptionBuilder(driver_name)
+ options = option_builder.build(commands)
+ # get the absolute path of the caller
+ driver_path = os.path.join(working_dir, driver_name)
+ cmd = [driver_path] + options
+ oneutils._run(cmd)
+ elif self.CFG_REFERENCE_K in workflow:
+ cfg_path = workflow[self.CFG_REFERENCE_K]['path']
+ runner = CfgRunner(cfg_path)
+ runner.run(working_dir, verbose)
diff --git a/compiler/one-cmds/onelib/constant.py b/compiler/one-cmds/onelib/constant.py
index 7ddd7382d..7dd79b65d 100644
--- a/compiler/one-cmds/onelib/constant.py
+++ b/compiler/one-cmds/onelib/constant.py
@@ -14,11 +14,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+
class CONSTANT:
__slots__ = () # This prevents access via __dict__.
OPTIMIZATION_OPTS = (
# (OPTION_NAME, HELP_MESSAGE)
- ('O1', 'enable O1 optimization pass'),
('convert_nchw_to_nhwc',
'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.'
),
@@ -29,6 +29,7 @@ class CONSTANT:
'convert the output shape of the model (argument for convert_nchw_to_nhwc)'),
('fold_add_v2', 'fold AddV2 op with constant inputs'),
('fold_cast', 'fold Cast op with constant input'),
+ ('fold_densify', 'fold Densify op with sparse constant input'),
('fold_dequantize', 'fold Dequantize op'),
('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'),
('fold_gather', 'fold Gather op'),
@@ -62,12 +63,16 @@ class CONSTANT:
('remove_unnecessary_slice', 'remove unnecessary slice ops'),
('remove_unnecessary_strided_slice', 'remove unnecessary strided slice ops'),
('remove_unnecessary_split', 'remove unnecessary split ops'),
+ ('replace_non_const_fc_with_batch_matmul',
+ 'replace FullyConnected op with non-const weights to BatchMatMul op'),
+ ('replace_sub_with_add', 'replace Sub op with Add op'),
('resolve_customop_add', 'convert Custom(Add) op to Add op'),
('resolve_customop_batchmatmul',
'convert Custom(BatchMatmul) op to BatchMatmul op'),
('resolve_customop_matmul', 'convert Custom(Matmul) op to Matmul op'),
('resolve_customop_max_pool_with_argmax',
'convert Custom(MaxPoolWithArgmax) to net of builtin operators'),
+ ('resolve_customop_splitv', 'convert Custom(SplitV) op to SplitV op'),
('shuffle_weight_to_16x1float32',
'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.'
' Note that it only converts weights whose row is a multiple of 16'),
diff --git a/compiler/one-cmds/onelib/make_cmd.py b/compiler/one-cmds/onelib/make_cmd.py
index d8380f28d..0015e8319 100644
--- a/compiler/one-cmds/onelib/make_cmd.py
+++ b/compiler/one-cmds/onelib/make_cmd.py
@@ -19,6 +19,7 @@ import sys
import onelib.constant as _constant
+
def _is_valid_attr(args, attr):
return hasattr(args, attr) and getattr(args, attr)
@@ -64,6 +65,10 @@ def make_tf2tfliteV2_cmd(args, driver_path, input_path, output_path):
cmd.append('--output_arrays')
cmd.append(getattr(args, 'output_arrays'))
+ # experimental options
+ if _is_valid_attr(args, 'experimental_disable_batchmatmul_unfold'):
+ cmd.append('--experimental_disable_batchmatmul_unfold')
+
return cmd
diff --git a/compiler/one-cmds/onnx_legalizer.py b/compiler/one-cmds/onnx_legalizer.py
index 26c2b75b9..0141514b6 100755
--- a/compiler/one-cmds/onnx_legalizer.py
+++ b/compiler/one-cmds/onnx_legalizer.py
@@ -341,7 +341,8 @@ def _dtype_to_np(dtype):
raise NotImplementedError('unsupported data type')
-def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip, activation_name):
+def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip,
+ activation_name):
"""Generate subgraph of one direction of unrolled RNN layer
Args:
@@ -395,7 +396,7 @@ def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip, activa
def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, activation,
- clip, direction, hidden_size, layout):
+ clip, direction, hidden_size, layout):
"""Generate Simple (forward or reverse) unrolled RNN
Args:
@@ -432,7 +433,7 @@ def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, a
else:
initial_h = None
state_tensors = _generate_one_direction_RNN(transformer, x, w, r, b, initial_h, clip,
- activation)
+ activation)
y_direction_dim = layout + 1
y_h_direction_dim = layout
state_layout_tensors = []
@@ -447,12 +448,11 @@ def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, a
transformer.make_node(
'Unsqueeze', [state_tensors[-1]], [Y_h], axes=[y_h_direction_dim])
Y = outputs[0]
- transformer.make_node(
- 'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+ transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim)
def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, activations,
- clip, hidden_size, layout):
+ clip, hidden_size, layout):
"""Generate Bidirectional unrolled RNN
Args:
@@ -503,10 +503,10 @@ def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, ac
initial_h[d] = transformer.make_squeeze(initial_h[d], axes=[direction_dim])
state_f_tensors = _generate_one_direction_RNN(transformer, x, w[0], r[0], b[0],
- initial_h[0], clip, activations[0])
+ initial_h[0], clip, activations[0])
x.reverse()
state_b_tensors = _generate_one_direction_RNN(transformer, x, w[1], r[1], b[1],
- initial_h[1], clip, activations[1])
+ initial_h[1], clip, activations[1])
state_b_tensors.reverse()
y_direction_dim = layout + 1
@@ -538,8 +538,7 @@ def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, ac
axis=y_h_direction_dim)
Y = outputs[0]
- transformer.make_node(
- 'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+ transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim)
def _legalize_RNN(transformer, tensor_infos, node):
@@ -600,10 +599,10 @@ def _legalize_RNN(transformer, tensor_infos, node):
if direction in ['forward', 'reverse']:
_transform_unidirectional_RNN(transformer, node, x, tensor_infos, activations[0],
- clip, direction, hidden_size, layout)
+ clip, direction, hidden_size, layout)
elif direction == 'bidirectional':
- _transform_bidirectional_RNN(transformer, node, x, tensor_infos, activations, clip,
- hidden_size, layout)
+ _transform_bidirectional_RNN(transformer, node, x, tensor_infos, activations,
+ clip, hidden_size, layout)
else:
raise RuntimeError('Unknown RNN type')
@@ -611,7 +610,7 @@ def _legalize_RNN(transformer, tensor_infos, node):
def _generate_one_direction_LSTM(transformer, X, W, R, B, initial_h, initial_c, P, clip,
- act, dtype, hidden_size, batch_size):
+ act, dtype, hidden_size, batch_size):
"""Generate subgraph for one direction of unrolled LSTM layer
Args:
@@ -754,7 +753,7 @@ def _generate_one_direction_LSTM(transformer, X, W, R, B, initial_h, initial_c,
def _transform_unidirectional_LSTM(transformer, original_node, x, tensor_infos,
- activations, clip, direction, hidden_size, layout):
+ activations, clip, direction, hidden_size, layout):
"""Generate Simple (forward or reverse) unrolled LSTM
Args:
@@ -818,17 +817,15 @@ def _transform_unidirectional_LSTM(transformer, original_node, x, tensor_infos,
transformer.make_node(
'Unsqueeze', [state_h_tensors[-1]], [Y_h], axes=[y_h_direction_dim])
Y_c = outputs[2]
- transformer.make_node(
- 'Unsqueeze', [state_c_tensor], [Y_c], axes=[y_h_direction_dim])
+ transformer.make_node('Unsqueeze', [state_c_tensor], [Y_c], axes=[y_h_direction_dim])
if direction == 'reverse':
state_layout_tensors.reverse()
Y = outputs[0]
- transformer.make_node(
- 'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+ transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim)
-def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos, activations,
- clip, hidden_size, layout):
+def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos,
+ activations, clip, hidden_size, layout):
"""Generate Bidirectional unrolled LSTM
Args:
@@ -929,12 +926,10 @@ def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos, a
Y_f_c = transformer.make_unsqueeze(state_f_c_tensor, axes=[y_c_direction_dim])
Y_b_c = transformer.make_unsqueeze(state_b_c_tensor, axes=[y_c_direction_dim])
Y_c = outputs[2]
- transformer.make_node(
- 'Concat', [Y_f_c, Y_b_c], [Y_c], axis=y_c_direction_dim)
+ transformer.make_node('Concat', [Y_f_c, Y_b_c], [Y_c], axis=y_c_direction_dim)
Y = outputs[0]
- transformer.make_node(
- 'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+ transformer.make_node('Concat', state_layout_tensors, [Y], axis=seq_length_dim)
def _legalize_LSTM(transformer, tensor_infos, node):
@@ -1001,10 +996,10 @@ def _legalize_LSTM(transformer, tensor_infos, node):
if direction in ['forward', 'reverse']:
_transform_unidirectional_LSTM(transformer, node, x, tensor_infos, activations,
- clip, direction, hidden_size, layout)
+ clip, direction, hidden_size, layout)
elif direction == 'bidirectional':
_transform_bidirectional_LSTM(transformer, node, x, tensor_infos, activations,
- clip, hidden_size, layout)
+ clip, hidden_size, layout)
else:
raise RuntimeError('Unknown LSTM type')
@@ -1052,10 +1047,12 @@ def legalize(model, options):
if __name__ == '__main__':
if len(sys.argv) < 3:
- print('usage: ./legalize_onnx.py <path to input model> <path to output model>\n'
- '\n'
- ' In stand-alone utility mode this tool provides basic funtionality\n'
- ' If you want to have more control over applied transformations, use this legalizer as a library')
+ print(
+ 'usage: ./legalize_onnx.py <path to input model> <path to output model>\n'
+ '\n'
+ ' In stand-alone utility mode this tool provides basic funtionality\n'
+ ' If you want to have more control over applied transformations, use this legalizer as a library'
+ )
exit(1)
options = LegalizeOptions()
options.unroll_lstm = True
diff --git a/compiler/one-cmds/requires.cmake b/compiler/one-cmds/requires.cmake
index b1aabdb97..c27920980 100644
--- a/compiler/one-cmds/requires.cmake
+++ b/compiler/one-cmds/requires.cmake
@@ -1,6 +1,7 @@
require("tf2tfliteV2")
require("tflite2circle")
require("circle2circle")
+require("circle-eval-diff")
require("circle-quantizer")
require("record-minmax")
require("vconone")
diff --git a/compiler/one-cmds/tests/CMakeLists.txt b/compiler/one-cmds/tests/CMakeLists.txt
index caea756c2..17f55ec96 100644
--- a/compiler/one-cmds/tests/CMakeLists.txt
+++ b/compiler/one-cmds/tests/CMakeLists.txt
@@ -4,6 +4,8 @@
file(GLOB TESTITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.test")
file(GLOB CONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.cfg")
file(GLOB QCONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.qconf.json")
+file(GLOB PYSCRIPTS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.py")
+file(GLOB WORKFLOWITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.workflow.json")
# Create a script to run the tests at installation folder
set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh")
@@ -45,6 +47,16 @@ foreach(QCONFIGITEM IN ITEMS ${QCONFIGITEMS})
install(FILES ${QCONFIGITEM} DESTINATION test)
endforeach(QCONFIGITEM)
+foreach(PYSCRIPT IN ITEMS ${PYSCRIPTS})
+ get_filename_component(ITEM_PREFIX ${PYSCRIPT} NAME_WE)
+ install(FILES ${PYSCRIPT} DESTINATION test)
+endforeach(PYSCRIPT)
+
+foreach(WORKFLOWITEM IN ITEMS ${WORKFLOWITEMS})
+ get_filename_component(ITEM_PREFIX ${WORKFLOWITEM} NAME_WE)
+ install(FILES ${WORKFLOWITEM} DESTINATION test)
+endforeach(WORKFLOWITEM)
+
file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n")
file(APPEND "${DRIVER_SCRIPT}"
diff --git a/compiler/one-cmds/tests/OONECC_024.cfg b/compiler/one-cmds/tests/OONECC_024.cfg
new file mode 100644
index 000000000..a39aae071
--- /dev/null
+++ b/compiler/one-cmds/tests/OONECC_024.cfg
@@ -0,0 +1,2 @@
+[one-optimize]
+make_batchnorm_gamma_positive=True
diff --git a/compiler/one-cmds/tests/one-build_008.cfg b/compiler/one-cmds/tests/one-build_008.cfg
index 615047c86..8c777f64f 100644
--- a/compiler/one-cmds/tests/one-build_008.cfg
+++ b/compiler/one-cmds/tests/one-build_008.cfg
@@ -15,7 +15,6 @@ output_path=test_onnx_model.circle
[one-optimize]
input_path=test_onnx_model.circle
output_path=test_onnx_model.opt.circle
-all=True
remove_redundant_transpose=True
[one-codegen]
diff --git a/compiler/one-cmds/tests/one-build_009.cfg b/compiler/one-cmds/tests/one-build_009.cfg
index 66bca250d..b5a35dd97 100644
--- a/compiler/one-cmds/tests/one-build_009.cfg
+++ b/compiler/one-cmds/tests/one-build_009.cfg
@@ -15,7 +15,6 @@ output_path=onnx_conv2d_conv2d.circle
[one-optimize]
input_path=onnx_conv2d_conv2d.circle
output_path=onnx_conv2d_conv2d.opt.circle
-all=True
remove_redundant_transpose=True
convert_nchw_to_nhwc=True
diff --git a/compiler/one-cmds/tests/one-import-onnx_002.test b/compiler/one-cmds/tests/one-import-onnx_002.test
new file mode 100644
index 000000000..a6a38eee5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-import-onnx_002.test
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# test for experimental_disable_batchmatmul_unfold option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./reshape_matmul.onnx"
+outputfile="./reshape_matmul.circle"
+
+rm -rf ${outputfile}
+rm -rf ${outputfile}.log
+
+# run test without option that should drop FULLY_CONNECTED
+one-import-onnx \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+circle-operator --code reshape_matmul.circle > ${outputfile}.log 2>&1
+
+if ! grep -q "FULLY_CONNECTED" "${outputfile}.log"; then
+ trap_err_onexit
+fi
+
+rm -rf ${outputfile}
+rm -rf ${outputfile}.log
+
+# run test with option that should drop BATCH_MATMUL
+one-import-onnx \
+--experimental_disable_batchmatmul_unfold \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+circle-operator --code reshape_matmul.circle > ${outputfile}.log 2>&1
+
+if ! grep -q "BATCH_MATMUL" "${outputfile}.log"; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
+exit 0
diff --git a/compiler/one-cmds/tests/one-infer-test-post-process.py b/compiler/one-cmds/tests/one-infer-test-post-process.py
new file mode 100644
index 000000000..0f0e0d701
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer-test-post-process.py
@@ -0,0 +1,16 @@
+# This script gets one argument and print it
+
+import sys
+from pathlib import Path
+
+
+def main():
+ if len(sys.argv) < 2:
+ filepath = Path(sys.argv[0])
+ sys.exit("Usage: " + filepath.name + " [Word to print]")
+ word = sys.argv[1]
+ print(word)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/compiler/one-cmds/tests/one-infer_001.test b/compiler/one-cmds/tests/one-infer_001.test
new file mode 100644
index 000000000..e7b569522
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_001.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/help-infer
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+# copy help-infer to bin folder
+cp help-infer ../bin/help-infer
+
+# run test
+one-infer -b help -- -h > ${filename}.log
+
+rm -rf ../bin/help-infer
+
+if grep -q "HELP MESSAGE!!" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_002.test b/compiler/one-cmds/tests/one-infer_002.test
new file mode 100644
index 000000000..22070de19
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_002.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-infer
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+ touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+# run test
+one-infer -d dummy-infer -- ${inputfile} > ${filename}.log
+
+rm -rf ../bin/dummy-infer
+
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_003.test b/compiler/one-cmds/tests/one-infer_003.test
new file mode 100644
index 000000000..e2aa459a1
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_003.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-infer
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+ touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+# run test
+one-infer -b dummy -- ${inputfile} > ${filename}.log
+
+rm -rf ../bin/dummy-infer
+
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_004.test b/compiler/one-cmds/tests/one-infer_004.test
new file mode 100644
index 000000000..a4cb76c55
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_004.test
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# print one-infer's help message
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+# run test
+one-infer -h > ${filename}.log
+
+if grep -q "command line tool to infer model" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_005.cfg b/compiler/one-cmds/tests/one-infer_005.cfg
new file mode 100644
index 000000000..aca687801
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_005.cfg
@@ -0,0 +1,3 @@
+[one-infer]
+backend=dummy
+command=sample.tvn
diff --git a/compiler/one-cmds/tests/one-infer_005.test b/compiler/one-cmds/tests/one-infer_005.test
new file mode 100644
index 000000000..a44dd0e25
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_005.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-infer with configuration input
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-infer
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="one-infer_005.cfg"
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+ touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+# run test
+one-infer -C ${configfile} > ${filename}.log
+
+rm -rf ../bin/dummy-infer
+
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_006.test b/compiler/one-cmds/tests/one-infer_006.test
new file mode 100644
index 000000000..2612133a3
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_006.test
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-infer with post process script
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-infer
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+ touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+# run test
+one-infer -b dummy --post-process "./one-infer-test-post-process.py TOKEN" -- ${inputfile} > ${filename}.log 2>&1
+return_code=$?
+
+rm -rf ../bin/dummy-infer
+
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+ if [ "$return_code" -eq "0" ]; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/one-infer_neg_001.test b/compiler/one-cmds/tests/one-infer_neg_001.test
new file mode 100644
index 000000000..62e721128
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_neg_001.test
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with no input
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "error: the following arguments are required: {-d/--driver | -b/--backend}" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+# run test
+one-infer > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-infer_neg_002.test b/compiler/one-cmds/tests/one-infer_neg_002.test
new file mode 100644
index 000000000..fa88876e8
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_neg_002.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# passed driver is not found
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+driver_name="neg-infer"
+
+trap_err_onexit()
+{
+ if grep -q "FileNotFoundError: ${driver_name} not found" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+# run test
+one-infer -d ${driver_name} -- -h> ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-infer_neg_003.test b/compiler/one-cmds/tests/one-infer_neg_003.test
new file mode 100644
index 000000000..a0005520f
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_neg_003.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# passed backend is not found
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+backend_name="neg"
+
+trap_err_onexit()
+{
+ if grep -q "FileNotFoundError: ${backend_name}-infer not found" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+# run test
+one-infer -b ${backend_name} -- -h> ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-infer_neg_004.test b/compiler/one-cmds/tests/one-infer_neg_004.test
new file mode 100644
index 000000000..b9130d051
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_neg_004.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# both -b and -d option drivers are given as argument
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+backend_name="neg"
+driver_name="neg2"
+
+trap_err_onexit()
+{
+ if grep -q "\-d and -b options are mutually exclusive. Please use only one of them" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+# run test
+one-infer -d ${driver_name} -b ${backend_name} -- -h> ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-infer_neg_005.test b/compiler/one-cmds/tests/one-infer_neg_005.test
new file mode 100644
index 000000000..9074debcf
--- /dev/null
+++ b/compiler/one-cmds/tests/one-infer_neg_005.test
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-infer with invalid post process script
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ return_code=$?
+ if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+ # Case of succeed of inference driver but error after it
+ if [ "$return_code" -ne "0" ]; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+ fi
+
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-infer
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="sample.tvn"
+
+if [[ ! -s "${inputfile}" ]]; then
+ touch ${inputfile}
+fi
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+# run test
+one-infer -b dummy --post-process "./one-infer-test-post-process.py" -- ${inputfile} > ${filename}.log 2>&1
+
+rm -rf ../bin/dummy-infer
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-optimize_001.test b/compiler/one-cmds/tests/one-optimize_001.test
index 8eb58f4eb..4152fa3dd 100644
--- a/compiler/one-cmds/tests/one-optimize_001.test
+++ b/compiler/one-cmds/tests/one-optimize_001.test
@@ -40,7 +40,7 @@ if [[ ! -s ${inputfile} ]]; then
fi
# run test
-one-optimize --O1 \
+one-optimize --resolve_customop_add \
--input_path ${inputfile} \
--output_path ${outputfile} > /dev/null 2>&1
diff --git a/compiler/one-cmds/tests/one-optimize_002.test b/compiler/one-cmds/tests/one-optimize_002.test
index bd64494be..58f792bf8 100644
--- a/compiler/one-cmds/tests/one-optimize_002.test
+++ b/compiler/one-cmds/tests/one-optimize_002.test
@@ -40,7 +40,7 @@ if [[ ! -s ${inputfile} ]]; then
fi
# run test
-one-optimize --O1 \
+one-optimize --resolve_customop_add \
--change_outputs InceptionV3/Logits/SpatialSqueeze1 \
--input_path ${inputfile} \
--output_path ${outputfile} > /dev/null 2>&1
diff --git a/compiler/one-cmds/tests/one-optimize_neg_001.test b/compiler/one-cmds/tests/one-optimize_neg_001.test
index f0b5563c7..c67e3d489 100644
--- a/compiler/one-cmds/tests/one-optimize_neg_001.test
+++ b/compiler/one-cmds/tests/one-optimize_neg_001.test
@@ -39,7 +39,7 @@ rm -rf ${outputfile}
rm -rf ${outputfile}.log
# run test
-one-optimize --O1 \
+one-optimize --resolve_customop_add \
--input_path ${inputfile} \
--output_path ${outputfile} > ${filename}.log 2>&1
diff --git a/compiler/one-cmds/tests/one-optimize_neg_002.test b/compiler/one-cmds/tests/one-optimize_neg_002.test
index 72f306e20..a1ef70216 100644
--- a/compiler/one-cmds/tests/one-optimize_neg_002.test
+++ b/compiler/one-cmds/tests/one-optimize_neg_002.test
@@ -39,7 +39,7 @@ rm -rf ${outputfile}
rm -rf ${outputfile}.log
# run test
-one-optimize --O1 \
+one-optimize --resolve_customop_add \
--input_path ${inputfile} \
--output_path ${outputfile} > ${filename}.log 2>&1
diff --git a/compiler/one-cmds/tests/one-optimize_neg_003.test b/compiler/one-cmds/tests/one-optimize_neg_003.test
index 3fe7d330e..668a6c29d 100644
--- a/compiler/one-cmds/tests/one-optimize_neg_003.test
+++ b/compiler/one-cmds/tests/one-optimize_neg_003.test
@@ -44,7 +44,7 @@ if [[ ! -s ${inputfile} ]]; then
fi
# run test
-one-optimize --O1 \
+one-optimize --resolve_customop_add \
--input_path "${inputfile}" > "${filename}.log" 2>&1
echo "${filename_ext} FAILED"
diff --git a/compiler/one-cmds/tests/one-optimize_neg_004.test b/compiler/one-cmds/tests/one-optimize_neg_004.test
index e73911b54..5abd4c553 100644
--- a/compiler/one-cmds/tests/one-optimize_neg_004.test
+++ b/compiler/one-cmds/tests/one-optimize_neg_004.test
@@ -39,7 +39,7 @@ rm -rf ${outputfile}
rm -rf ${filename}.log
# run test
-one-optimize --O1 \
+one-optimize --resolve_customop_add \
--change_outputs non_existing_node_name \
--input_path ${inputfile} \
--output_path ${outputfile} > ${filename}.log 2>&1
diff --git a/compiler/one-cmds/tests/one-partition_001.test b/compiler/one-cmds/tests/one-partition_001.test
new file mode 100644
index 000000000..a6fba07d7
--- /dev/null
+++ b/compiler/one-cmds/tests/one-partition_001.test
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+testmodel="Net_InstanceNorm_003"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="${testmodel}.circle"
+partfile="${testmodel}.part"
+outputfile="${testmodel}.conn.json"
+
+rm -rf ${testmodel}.000*
+rm -rf ${testmodel}.conn.*
+rm -rf ${testmodel}.*.log
+
+# run test
+one-partition \
+--input_file ${inputfile} \
+--part_file ${partfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-partition_neg_001.test b/compiler/one-cmds/tests/one-partition_neg_001.test
new file mode 100644
index 000000000..d54a94fa2
--- /dev/null
+++ b/compiler/one-cmds/tests/one-partition_neg_001.test
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid .part file (wrong comply value)
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+testmodel="Net_InstanceNorm_003"
+
+trap_err_onexit()
+{
+ if grep -q "ERROR" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="${testmodel}.circle"
+partfile="${testmodel}.neg.part"
+outputfile="${testmodel}.conn.json"
+
+rm -rf ${testmodel}.000*
+rm -rf ${testmodel}.conn.*
+rm -rf ${testmodel}.*.log
+rm -rf ${filename}.log
+
+# run test
+one-partition \
+--input_file ${inputfile} \
+--part_file ${partfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-partition_neg_002.test b/compiler/one-cmds/tests/one-partition_neg_002.test
new file mode 100644
index 000000000..23fe84c05
--- /dev/null
+++ b/compiler/one-cmds/tests/one-partition_neg_002.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid .cfg file (no one-partition section)
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+testmodel="Net_InstanceNorm_003"
+
+trap_err_onexit()
+{
+ if grep -q "'one-partition' section" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+cfgfile="${testmodel}.neg.cfg"
+
+rm -rf ${testmodel}.000*
+rm -rf ${testmodel}.conn.*
+rm -rf ${testmodel}.*.log
+rm -rf ${filename}.log
+
+# run test
+one-partition -C ${cfgfile}> ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/one-quantize_010.test b/compiler/one-cmds/tests/one-quantize_010.test
new file mode 100644
index 000000000..1095ba0a0
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_010.test
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+check_message()
+{
+ if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_010.q.circle"
+datafile="./inception_v3_test_data.h5"
+
+rm -rf ${outputfile}
+
+# to create inception_v3.circle
+if [[ ! -s ${inputfile} ]]; then
+ /bin/bash one-import_001.test > /dev/null 2>&1
+ return_code=$?
+ if [[ ${return_code} != 0 ]]; then
+ trap_err_onexit
+ fi
+fi
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_path ${inputfile} \
+--input_data ${datafile} \
+--output_path ${outputfile} \
+--evaluate_result \
+--test_data ${datafile} \
+--print_mpeir > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/one-quantize_011.test b/compiler/one-cmds/tests/one-quantize_011.test
new file mode 100644
index 000000000..34d7f57b5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_011.test
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+check_message()
+{
+ if grep -q "Mean Top-5 match ratio for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_011.q.circle"
+datafile="./inception_v3_test_data.h5"
+
+rm -rf ${outputfile}
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_path ${inputfile} \
+--input_data ${datafile} \
+--output_path ${outputfile} \
+--evaluate_result \
+--test_data ${datafile} \
+--print_top5_match > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/one-quantize_012.qconf.json b/compiler/one-cmds/tests/one-quantize_012.qconf.json
new file mode 100644
index 000000000..4a15b04f5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_012.qconf.json
@@ -0,0 +1,16 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "names" : ["InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu;InceptionV3/InceptionV3/Conv2d_2b_3x3/BatchNorm/FusedBatchNorm;InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D;InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D",
+ "InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool",
+ "InceptionV3/InceptionV3/Mixed_5b/concat",
+ "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool",
+ "InceptionV3/InceptionV3/Mixed_7c/concat",
+ "InceptionV3/Predictions/Reshape_1"],
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/one-cmds/tests/one-quantize_012.test b/compiler/one-cmds/tests/one-quantize_012.test
new file mode 100644
index 000000000..fba18acc5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_012.test
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_012.q.circle"
+
+rm -rf ${outputfile}
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--quant_config one-quantize_012.qconf.json \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_013.qconf.json b/compiler/one-cmds/tests/one-quantize_013.qconf.json
new file mode 100644
index 000000000..4a15b04f5
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_013.qconf.json
@@ -0,0 +1,16 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "names" : ["InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu;InceptionV3/InceptionV3/Conv2d_2b_3x3/BatchNorm/FusedBatchNorm;InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D;InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D",
+ "InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool",
+ "InceptionV3/InceptionV3/Mixed_5b/concat",
+ "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool",
+ "InceptionV3/InceptionV3/Mixed_7c/concat",
+ "InceptionV3/Predictions/Reshape_1"],
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/compiler/one-cmds/tests/one-quantize_013.test b/compiler/one-cmds/tests/one-quantize_013.test
new file mode 100644
index 000000000..fd443d627
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_013.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# quantized_dtype and granularity are given by qconfig file
+# (not by command line interface)
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_013.q.circle"
+
+rm -rf ${outputfile}
+
+# run test without input data
+# quantized_dtype and granularity are not given here
+one-quantize \
+--input_dtype float32 \
+--quant_config one-quantize_013.qconf.json \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_014.test b/compiler/one-cmds/tests/one-quantize_014.test
new file mode 100644
index 000000000..518c32841
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_014.test
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test if `circle-eval-diff` supports directory input.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+check_message()
+{
+ if grep -q "Mean Top-5 match ratio for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.one-quantize_014.q.circle"
+datadir="./raw_files/"
+
+rm -rf ${outputfile}
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--input_path ${inputfile} \
+--input_data ${datadir} \
+--input_data_format dir \
+--output_path ${outputfile} \
+--evaluate_result \
+--test_data ${datadir} \
+--print_top5_match > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/one-quantize_015.test b/compiler/one-cmds/tests/one-quantize_015.test
new file mode 100644
index 000000000..bb45b5722
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_015.test
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test if --fake_quantize option works well
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.mat.q8.circle"
+outputfile="./inception_v3.one-quantize_015.fq.circle"
+
+rm -rf ${outputfile}
+
+# run test
+one-quantize \
+--fake_quantize \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_neg_019.test b/compiler/one-cmds/tests/one-quantize_neg_019.test
index ac920a4fe..e182edf78 100644
--- a/compiler/one-cmds/tests/one-quantize_neg_019.test
+++ b/compiler/one-cmds/tests/one-quantize_neg_019.test
@@ -42,7 +42,7 @@ one-quantize \
--input_dtype float32 \
--quantized_dtype int16 \
--granularity channel \
---input_type float32 \
+--input_type float64 \
--input_path ${inputfile} \
--output_path ${outputfile} > ${filename}.log 2>&1
diff --git a/compiler/one-cmds/tests/one-quantize_neg_020.test b/compiler/one-cmds/tests/one-quantize_neg_020.test
new file mode 100644
index 000000000..27b11c3e6
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_020.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# check error message is printed when qconfig file is not json
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Failed to decode" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.quantized.neg_020.circle"
+
+rm -rf ${outputfile}.log
+
+# run test
+one-quantize \
+--input_dtype float32 \
+--quant_config one-quantize_neg_020.test \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_008.cfg b/compiler/one-cmds/tests/onecc_008.cfg
index 0be026e6e..020e274e1 100644
--- a/compiler/one-cmds/tests/onecc_008.cfg
+++ b/compiler/one-cmds/tests/onecc_008.cfg
@@ -15,7 +15,6 @@ output_path=test_onnx_model.circle
[one-optimize]
input_path=test_onnx_model.circle
output_path=test_onnx_model.opt.circle
-all=True
remove_redundant_transpose=True
[one-codegen]
diff --git a/compiler/one-cmds/tests/onecc_009.cfg b/compiler/one-cmds/tests/onecc_009.cfg
index a17ae59cb..86121c557 100644
--- a/compiler/one-cmds/tests/onecc_009.cfg
+++ b/compiler/one-cmds/tests/onecc_009.cfg
@@ -15,7 +15,6 @@ output_path=onnx_conv2d_conv2d.circle
[one-optimize]
input_path=onnx_conv2d_conv2d.circle
output_path=onnx_conv2d_conv2d.opt.circle
-all=True
remove_redundant_transpose=True
convert_nchw_to_nhwc=True
diff --git a/compiler/one-cmds/tests/onecc_024.cfg b/compiler/one-cmds/tests/onecc_024.cfg
new file mode 100644
index 000000000..7b4b1a80a
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_024.cfg
@@ -0,0 +1,22 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v1
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+make_batchnorm_gamma_positive=False
diff --git a/compiler/one-cmds/tests/onecc_024.test b/compiler/one-cmds/tests/onecc_024.test
new file mode 100644
index 000000000..1f5daa13e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_024.test
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use `OONECC_024` optimization option
+
+: '
+This test assumes below directories.
+
+[one hierarchy]
+ one
+ ├── backends
+ ├── bin
+ ├── doc
+ ├── include
+ ├── lib
+ ├── optimization
+ └── test # pwd
+'
+
+OPT_ALREADY_EXIST=true
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+clean_envir()
+{
+ rm -rf ../optimization/OONECC_024.cfg
+ if [ "$OPT_ALREADY_EXIST" = false ]; then
+ rm -rf ../optimization
+ fi
+}
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ clean_envir
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_024.cfg"
+outputfile="inception_v3.opt.circle"
+
+rm -rf ${outputfile}
+
+if [ ! -d "../optimization" ]; then
+ mkdir -p ../optimization
+ OPT_ALREADY_EXIST=false
+fi
+
+cp OONECC_024.cfg ../optimization
+
+# run test
+LUCI_LOG=5 onecc -C ${configfile} -OONECC_024 > ${filename}.log 2>&1
+
+clean_envir
+
+if grep -q "MakeBatchNormGammaPositivePass" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/onecc_025.cfg b/compiler/one-cmds/tests/onecc_025.cfg
new file mode 100644
index 000000000..4776ea80e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_025.cfg
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
diff --git a/compiler/one-cmds/tests/onecc_025.test b/compiler/one-cmds/tests/onecc_025.test
new file mode 100644
index 000000000..396f40cea
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_025.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-import-tf -> one-optimize with the configuration file that includes `onecc` section
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_001.cfg"
+outputfile="inception_v3.opt.circle"
+
+# run test
+onecc -C ${configfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_026.cfg b/compiler/one-cmds/tests/onecc_026.cfg
new file mode 100644
index 000000000..c27a13654
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_026.cfg
@@ -0,0 +1,16 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+
+[one-quantize]
+input_path=inception_v3.circle
+output_path=inception_v3.onecc_026.q.circle
+input_data=inception_v3_test_data.h5
+evaluate_result=True
+test_data=inception_v3_test_data.h5
+print_mpeir=True
diff --git a/compiler/one-cmds/tests/onecc_026.test b/compiler/one-cmds/tests/onecc_026.test
new file mode 100644
index 000000000..84cfa4146
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_026.test
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+check_message()
+{
+ if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_026.cfg"
+outputfile="inception_v3.onecc_026.q.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/onecc_027.cfg b/compiler/one-cmds/tests/onecc_027.cfg
new file mode 100644
index 000000000..d3f6b5e82
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_027.cfg
@@ -0,0 +1,15 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+one-profile=False
+one-infer=True
+
+[one-infer]
+backend=dummy
+command=test_onnx_model.bin
diff --git a/compiler/one-cmds/tests/onecc_027.test b/compiler/one-cmds/tests/onecc_027.test
new file mode 100644
index 000000000..e727359ef
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_027.test
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-infer
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-profile
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_027.cfg"
+
+# copy dummy-infer to bin folder
+cp dummy-infer ../bin/dummy-infer
+
+# run test
+onecc -C ${configfile} > ${filename}.log
+
+rm -rf ../bin/dummy-infer
+
+if grep -q "dummy-infer dummy output!!!" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+fi
+
+trap_err_onexit
diff --git a/compiler/one-cmds/tests/onecc_028.test b/compiler/one-cmds/tests/onecc_028.test
new file mode 100644
index 000000000..10ce1583b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_028.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-optimize -> one-pack
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_028.workflow.json"
+outputfile="inception_v3_pkg"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_028.workflow.json b/compiler/one-cmds/tests/onecc_028.workflow.json
new file mode 100644
index 000000000..84bfd01fa
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_028.workflow.json
@@ -0,0 +1,37 @@
+{
+ "workflows": [
+ "MY_WORKFLOW"
+ ],
+ "MY_WORKFLOW": {
+ "steps": [
+ "IMPORT_TF",
+ "OPTIMIZE",
+ "PACK"
+ ],
+ "IMPORT_TF": {
+ "one-cmd": "one-import-tf",
+ "commands": {
+ "input_path": "inception_v3.pb",
+ "output_path": "inception_v3.circle",
+ "input_arrays": "input",
+ "input_shapes": "1,299,299,3",
+ "output_arrays": "InceptionV3/Predictions/Reshape_1",
+ "converter_version": "v2"
+ }
+ },
+ "OPTIMIZE": {
+ "one-cmd": "one-optimize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.opt.circle"
+ }
+ },
+ "PACK": {
+ "one-cmd": "one-pack",
+ "commands": {
+ "input_path": "inception_v3.opt.circle",
+ "output_path": "inception_v3_pkg"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_029.test b/compiler/one-cmds/tests/onecc_029.test
new file mode 100644
index 000000000..9bab1a1ee
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_029.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_029.workflow.json"
+outputfile="inception_v3.quantized.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_029.workflow.json b/compiler/one-cmds/tests/onecc_029.workflow.json
new file mode 100644
index 000000000..65c9ea662
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_029.workflow.json
@@ -0,0 +1,30 @@
+{
+ "workflows": [
+ "QUANTIZE_WORKFLOW"
+ ],
+ "QUANTIZE_WORKFLOW": {
+ "steps": [
+ "IMPORT_TF",
+ "QUANTIZE"
+ ],
+ "IMPORT_TF": {
+ "one-cmd": "one-import-tf",
+ "commands": {
+ "input_path": "inception_v3.pb",
+ "output_path": "inception_v3.circle",
+ "input_arrays": "input",
+ "input_shapes": "1,299,299,3",
+ "output_arrays": "InceptionV3/Predictions/Reshape_1",
+ "converter_version": "v2"
+ }
+ },
+ "QUANTIZE": {
+ "one-cmd": "one-quantize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.quantized.circle",
+ "input_data": "inception_v3_test_data.h5"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_030.test b/compiler/one-cmds/tests/onecc_030.test
new file mode 100644
index 000000000..c0aa56a51
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_030.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-compile
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_030.workflow.json"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_030.workflow.json b/compiler/one-cmds/tests/onecc_030.workflow.json
new file mode 100644
index 000000000..111a1b034
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_030.workflow.json
@@ -0,0 +1,29 @@
+{
+ "workflows": [
+ "codegen_wf"
+ ],
+ "codegen_wf": {
+ "steps": [
+ "import_tf",
+ "codegen"
+ ],
+ "import_tf": {
+ "one-cmd": "one-import-tf",
+ "commands": {
+ "input_path": "inception_v3.pb",
+ "output_path": "inception_v3.circle",
+ "input_arrays": "input",
+ "input_shapes": "1,299,299,3",
+ "output_arrays": "InceptionV3/Predictions/Reshape_1",
+ "converter_version": "v2"
+ }
+ },
+ "codegen": {
+ "one-cmd": "one-codegen",
+ "commands": {
+ "backend": "dummy",
+ "command": "-o sample.tvn inception_v3.circle"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_031.test b/compiler/one-cmds/tests/onecc_031.test
new file mode 100644
index 000000000..7a1c670c8
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_031.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tflite -> one-optimize -> one-codgen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-compile
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_031.workflow.json"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_031.workflow.json b/compiler/one-cmds/tests/onecc_031.workflow.json
new file mode 100644
index 000000000..83d52b942
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_031.workflow.json
@@ -0,0 +1,33 @@
+{
+ "workflows": [
+ "wf"
+ ],
+ "wf": {
+ "steps": [
+ "import",
+ "optimize",
+ "codegen"
+ ],
+ "import": {
+ "one-cmd": "one-import-tflite",
+ "commands": {
+ "input_path": "inception_v3.tflite",
+ "output_path": "inception_v3.circle"
+ }
+ },
+ "optimize": {
+ "one-cmd": "one-optimize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.opt.circle"
+ }
+ },
+ "codegen": {
+ "one-cmd": "one-codegen",
+ "commands": {
+ "backend": "dummy",
+ "command": "-o sample.tvn inception_v3.opt.circle"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_032.test b/compiler/one-cmds/tests/onecc_032.test
new file mode 100644
index 000000000..89b6c41a5
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_032.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-optimize -> one-quantize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-compile
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_032.workflow.json"
+outputfile="sample.tvn"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_032.workflow.json b/compiler/one-cmds/tests/onecc_032.workflow.json
new file mode 100644
index 000000000..08d3f0f5c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_032.workflow.json
@@ -0,0 +1,42 @@
+{
+ "workflows": [
+ "wf"
+ ],
+ "wf": {
+ "steps": [
+ "import",
+ "optimize",
+ "quantize",
+ "codegen"
+ ],
+ "import": {
+ "one-cmd": "one-import-tflite",
+ "commands": {
+ "input_path": "inception_v3.tflite",
+ "output_path": "inception_v3.circle"
+ }
+ },
+ "optimize": {
+ "one-cmd": "one-optimize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.opt.circle"
+ }
+ },
+ "quantize": {
+ "one-cmd": "one-quantize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.quantized.circle",
+ "input_data": "inception_v3_test_data.h5"
+ }
+ },
+ "codegen": {
+ "one-cmd": "one-codegen",
+ "commands": {
+ "backend": "dummy",
+ "command": "-o sample.tvn inception_v3.quantized.circle"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_033.test b/compiler/one-cmds/tests/onecc_033.test
new file mode 100644
index 000000000..635582f61
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_033.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-optimize -> one-quantize -> one-pack
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_033.workflow.json"
+outputfile="inception_v3_pkg"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_033.workflow.json b/compiler/one-cmds/tests/onecc_033.workflow.json
new file mode 100644
index 000000000..01233ffd9
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_033.workflow.json
@@ -0,0 +1,42 @@
+{
+ "workflows": [
+ "wf"
+ ],
+ "wf": {
+ "steps": [
+ "import",
+ "optimize",
+ "quantize",
+ "pack"
+ ],
+ "import": {
+ "one-cmd": "one-import-tflite",
+ "commands": {
+ "input_path": "inception_v3.tflite",
+ "output_path": "inception_v3.circle"
+ }
+ },
+ "optimize": {
+ "one-cmd": "one-optimize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.opt.circle"
+ }
+ },
+ "quantize": {
+ "one-cmd": "one-quantize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.quantized.circle",
+ "input_data": "inception_v3_test_data.h5"
+ }
+ },
+ "pack": {
+ "one-cmd": "one-pack",
+ "commands": {
+ "input_path": "inception_v3.quantized.circle",
+ "output_path": "inception_v3_pkg"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_034.test b/compiler/one-cmds/tests/onecc_034.test
new file mode 100644
index 000000000..e76654809
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_034.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-onnx -> one-optimize -> one-codegen
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ rm -rf ../bin/dummy-compile
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_034.workflow.json"
+outputfile="onnx_conv2d_conv2d.bin"
+
+rm -rf ${outputfile}
+
+# copy dummy-compile to bin folder
+cp dummy-compile ../bin/dummy-compile
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+rm -rf ../bin/dummy-compile
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_034.workflow.json b/compiler/one-cmds/tests/onecc_034.workflow.json
new file mode 100644
index 000000000..bc3cbbf58
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_034.workflow.json
@@ -0,0 +1,35 @@
+{
+ "workflows": [
+ "wf"
+ ],
+ "wf": {
+ "steps": [
+ "import",
+ "optimize",
+ "codegen"
+ ],
+ "import": {
+ "one-cmd": "one-import-onnx",
+ "commands": {
+ "input_path": "onnx_conv2d_conv2d.onnx",
+ "output_path": "onnx_conv2d_conv2d.circle"
+ }
+ },
+ "optimize": {
+ "one-cmd": "one-optimize",
+ "commands": {
+ "input_path": "onnx_conv2d_conv2d.circle",
+ "output_path": "onnx_conv2d_conv2d.opt.circle",
+ "remove_redundant_transpose": "True",
+ "convert_nchw_to_nhwc": "True"
+ }
+ },
+ "codegen": {
+ "one-cmd": "one-codegen",
+ "commands": {
+ "backend": "dummy",
+ "command": "-o onnx_conv2d_conv2d.bin onnx_conv2d_conv2d.opt.circle"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_035.test b/compiler/one-cmds/tests/onecc_035.test
new file mode 100644
index 000000000..762cdd31a
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_035.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf generates intermediate files
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_035.workflow.json"
+outputfile="inception_v3.alt.circle"
+intermfile="inception_v3.alt.tflite"
+
+rm -rf ${outputfile}
+rm -rf ${intermfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+if [[ ! -s "${intermfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_035.workflow.json b/compiler/one-cmds/tests/onecc_035.workflow.json
new file mode 100644
index 000000000..6abf1f32b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_035.workflow.json
@@ -0,0 +1,22 @@
+{
+ "workflows": [
+ "wf"
+ ],
+ "wf": {
+ "steps": [
+ "import"
+ ],
+ "import": {
+ "one-cmd": "one-import-tf",
+ "commands": {
+ "input_path": "inception_v3.pb",
+ "output_path": "inception_v3.alt.circle",
+ "input_arrays": "input",
+ "input_shapes": "1,299,299,3",
+ "output_arrays": "InceptionV3/Predictions/Reshape_1",
+ "converter_version": "v1",
+ "save_intermediate": "True"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_036.test b/compiler/one-cmds/tests/onecc_036.test
new file mode 100644
index 000000000..865255e9f
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_036.test
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-onnx generates intermediate files
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_036.workflow.json"
+outputfile="test_onnx_model.circle"
+intermfile="test_onnx_model.tflite"
+
+rm -rf ${outputfile}
+rm -rf ${intermfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+if [[ ! -s "${intermfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_036.workflow.json b/compiler/one-cmds/tests/onecc_036.workflow.json
new file mode 100644
index 000000000..5fa29edb5
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_036.workflow.json
@@ -0,0 +1,18 @@
+{
+ "workflows": [
+ "wf"
+ ],
+ "wf": {
+ "steps": [
+ "import"
+ ],
+ "import": {
+ "one-cmd": "one-import-onnx",
+ "commands": {
+ "input_path": "test_onnx_model.onnx",
+ "output_path": "test_onnx_model.circle",
+ "save_intermediate": "True"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_037.test b/compiler/one-cmds/tests/onecc_037.test
new file mode 100644
index 000000000..52ea9e4c7
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_037.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-optimize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_037.workflow.json"
+outputfile="inception_v3.opt.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_037.workflow.json b/compiler/one-cmds/tests/onecc_037.workflow.json
new file mode 100644
index 000000000..3317fb27a
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_037.workflow.json
@@ -0,0 +1,29 @@
+{
+ "workflows": [
+ "SIMPLE_WORKFLOW"
+ ],
+ "SIMPLE_WORKFLOW": {
+ "steps": [
+ "IMPORT",
+ "OPTIMIZE"
+ ],
+ "IMPORT": {
+ "one-cmd": "one-import-tf",
+ "commands": {
+ "input_path": "inception_v3.pb",
+ "output_path": "inception_v3.circle",
+ "input_arrays": "input",
+ "input_shapes": "1,299,299,3",
+ "output_arrays": "InceptionV3/Predictions/Reshape_1",
+ "converter_version": "v2"
+ }
+ },
+ "OPTIMIZE": {
+ "one-cmd": "one-optimize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.opt.circle"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_038.test b/compiler/one-cmds/tests/onecc_038.test
new file mode 100644
index 000000000..6b8f7cf64
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_038.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-import-tf -> one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_038.workflow.json"
+outputfile="inception_v3.list.quantized.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_038.workflow.json b/compiler/one-cmds/tests/onecc_038.workflow.json
new file mode 100644
index 000000000..5ac515d00
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_038.workflow.json
@@ -0,0 +1,31 @@
+{
+ "workflows": [
+ "SIMPLE_WORKFLOW"
+ ],
+ "SIMPLE_WORKFLOW": {
+ "steps": [
+ "IMPORT",
+ "QUANTIZE"
+ ],
+ "IMPORT": {
+ "one-cmd": "one-import-tf",
+ "commands": {
+ "input_path": "inception_v3.pb",
+ "output_path": "inception_v3.circle",
+ "input_arrays": "input",
+ "input_shapes": "1,299,299,3",
+ "output_arrays": "InceptionV3/Predictions/Reshape_1",
+ "converter_version": "v2"
+ }
+ },
+ "QUANTIZE": {
+ "one-cmd": "one-quantize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.list.quantized.circle",
+ "input_data": "datalist.txt",
+ "input_data_format": "list"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_039.test b/compiler/one-cmds/tests/onecc_039.test
new file mode 100644
index 000000000..7db9d901c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_039.test
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow where one-quantize quantizes the model and evaluates the result
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+check_message()
+{
+ if grep -q "MPEIR for InceptionV3/Predictions/Reshape_1 is" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_039.workflow.json"
+outputfile="inception_v3.onecc_039.q.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+check_message
diff --git a/compiler/one-cmds/tests/onecc_039.workflow.json b/compiler/one-cmds/tests/onecc_039.workflow.json
new file mode 100644
index 000000000..55ef56988
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_039.workflow.json
@@ -0,0 +1,21 @@
+{
+ "workflows": [
+ "SIMPLE_WORKFLOW"
+ ],
+ "SIMPLE_WORKFLOW": {
+ "steps": [
+ "QUANTIZE"
+ ],
+ "QUANTIZE": {
+ "one-cmd": "one-quantize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_026.q.circle",
+ "input_data": "inception_v3_test_data.h5",
+ "evaluate_result": "True",
+ "test_data": "inception_v3_test_data.h5",
+ "print_mpeir": "True"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_040.cfg b/compiler/one-cmds/tests/onecc_040.cfg
new file mode 100644
index 000000000..4776ea80e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_040.cfg
@@ -0,0 +1,20 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
diff --git a/compiler/one-cmds/tests/onecc_040.test b/compiler/one-cmds/tests/onecc_040.test
new file mode 100644
index 000000000..2f7567730
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_040.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflow with cfg reference
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_040.workflow.json"
+outputfile="inception_v3.opt.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onecc_040.workflow.json b/compiler/one-cmds/tests/onecc_040.workflow.json
new file mode 100644
index 000000000..2d4119b21
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_040.workflow.json
@@ -0,0 +1,10 @@
+{
+ "workflows": [
+ "MY_WORKFLOW"
+ ],
+ "MY_WORKFLOW": {
+ "cfg-reference": {
+ "path": "onecc_040.cfg"
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_041.cfg b/compiler/one-cmds/tests/onecc_041.cfg
new file mode 100644
index 000000000..16135f074
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_041.cfg
@@ -0,0 +1,16 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3_without_opt.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
diff --git a/compiler/one-cmds/tests/onecc_041.test b/compiler/one-cmds/tests/onecc_041.test
new file mode 100644
index 000000000..791dd12ca
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_041.test
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# run a workflows
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+check_message()
+{
+ if grep -q "Do inference of inception_v3_without_opt\.circle" "${filename}.log" &&
+ grep -q "Do inference of inception_v3\.opt\.circle" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ trap_err_onexit
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_041.workflow.json"
+outputfile1="inception_v3_without_opt.circle"
+outputfile2="inception_v3.opt.circle"
+
+cp dummy-inferV2 ../bin/dummy-inferV2
+
+rm -rf ${outputfile1} {outputfile2}
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+rm -rf ../bin/dummy-inferV2
+
+if [[ ! -s "${outputfile1}" ]] && [[ ! -s "${outputfile2}" ]]; then
+ trap_err_onexit
+fi
+
+check_message
diff --git a/compiler/one-cmds/tests/onecc_041.workflow.json b/compiler/one-cmds/tests/onecc_041.workflow.json
new file mode 100644
index 000000000..7dfc1c664
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_041.workflow.json
@@ -0,0 +1,61 @@
+{
+ "workflows": [
+ "WITHOUT_OPT",
+ "WITH_OPT",
+ "INFER"
+ ],
+ "INFER": {
+ "run-after": [
+ "WITHOUT_OPT",
+ "WITH_OPT"
+ ],
+ "steps": [
+ "INFER1",
+ "INFER2"
+ ],
+ "INFER1": {
+ "one-cmd": "one-infer",
+ "commands" : {
+ "driver": "dummy-inferV2",
+ "command": "inception_v3_without_opt.circle"
+ }
+ },
+ "INFER2": {
+ "one-cmd": "one-infer",
+ "commands": {
+ "driver": "dummy-inferV2",
+ "command": "inception_v3.opt.circle"
+ }
+ }
+ },
+ "WITHOUT_OPT": {
+ "cfg-reference": {
+ "path": "onecc_041.cfg"
+ }
+ },
+ "WITH_OPT": {
+ "steps": [
+ "IMPORT_TF",
+ "OPTIMIZE"
+ ],
+ "IMPORT_TF": {
+ "one-cmd": "one-import-tf",
+ "commands": {
+ "input_path": "inception_v3.pb",
+ "output_path": "inception_v3.circle",
+ "input_arrays": "input",
+ "input_shapes": "1,299,299,3",
+ "output_arrays": "InceptionV3/Predictions/Reshape_1",
+ "converter_version": "v2"
+ }
+ },
+ "OPTIMIZE": {
+ "one-cmd": "one-optimize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.opt.circle"
+ }
+ }
+ }
+
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_009.test b/compiler/one-cmds/tests/onecc_neg_009.test
new file mode 100644
index 000000000..54dd129e4
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_009.test
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Valid optimization option but invalid configuration file path
+
+: '
+This test assumes below directories.
+
+[one hierarchy]
+ one
+ ├── backends
+ ├── bin
+ ├── doc
+ ├── include
+ ├── lib
+ ├── optimization
+ └── test # pwd
+'
+
+OPT_ALREADY_EXIST=true
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ rm -rf ../optimization/OONECC_NEG_009.cfg
+ if [ "$OPT_ALREADY_EXIST" = false ]; then
+ rm -rf ../optimization
+ fi
+ if grep -q "Not found given configuration file" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+if [ ! -d "../optimization" ]; then
+ mkdir -p ../optimization
+ OPT_ALREADY_EXIST=false
+fi
+
+
+touch ../optimization/OONECC_NEG_009.cfg
+
+configfile=".."
+
+# run test
+onecc -C ${configfile} -OONECC_NEG_009 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_010.test b/compiler/one-cmds/tests/onecc_neg_010.test
new file mode 100644
index 000000000..ddad5e6de
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_010.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Invalid optimization option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Invalid optimization option" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile=".."
+
+# run test
+onecc -C ${configfile} -OONECC_NEG_010 > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_011.cfg b/compiler/one-cmds/tests/onecc_neg_011.cfg
new file mode 100644
index 000000000..b5873245b
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_011.cfg
@@ -0,0 +1,13 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=True
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-optimize]
+input_path=inception_v3.circle
+output_path=inception_v3.opt.circle
+wrong_opt=True
diff --git a/compiler/one-cmds/tests/onecc_neg_011.test b/compiler/one-cmds/tests/onecc_neg_011.test
new file mode 100644
index 000000000..3f043a77e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_011.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# generate error for unrecognized opitmization option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "following arguments are unrecognized" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_011.cfg"
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_012.cfg b/compiler/one-cmds/tests/onecc_neg_012.cfg
new file mode 100644
index 000000000..fdc73ef43
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_012.cfg
@@ -0,0 +1,15 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+one-profile=False
+one-infer=True
+
+[one-infer]
+driver=dummy-infer
+backend=dummy
+command="dummy arguments"
diff --git a/compiler/one-cmds/tests/onecc_neg_012.test b/compiler/one-cmds/tests/onecc_neg_012.test
new file mode 100644
index 000000000..9feca5f54
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_012.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Check driver and backend option is mutually exclusive
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "\-d and -b options are mutually exclusive" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_neg_012.cfg"
+
+# run test
+onecc -C ${configfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_013.test b/compiler/one-cmds/tests/onecc_neg_013.test
new file mode 100644
index 000000000..0dd8a0fdd
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_013.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with missing workflow file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Not found given workflow file" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_013.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_014.test b/compiler/one-cmds/tests/onecc_neg_014.test
new file mode 100644
index 000000000..2ed5dcbf5
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_014.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# invalid workflow file
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Invalid workflow file" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_014.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_014.workflow.json b/compiler/one-cmds/tests/onecc_neg_014.workflow.json
new file mode 100644
index 000000000..8d4fd431e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_014.workflow.json
@@ -0,0 +1,3 @@
+{
+ INVALID JSON FILE
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_015.test b/compiler/one-cmds/tests/onecc_neg_015.test
new file mode 100644
index 000000000..079ba677a
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_015.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Not found" "${filename}.log" &&
+ grep -q "key in workflow file" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_015.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_015.workflow.json b/compiler/one-cmds/tests/onecc_neg_015.workflow.json
new file mode 100644
index 000000000..4cb752e4e
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_015.workflow.json
@@ -0,0 +1,21 @@
+{
+ "workflowsssssss": [
+ "SIMPLE_WORKFLOW"
+ ],
+ "SIMPLE_WORKFLOW": {
+ "steps": [
+ "QUANTIZE"
+ ],
+ "QUANTIZE": {
+ "one-cmd": "one-quantize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_026.q.circle",
+ "input_data": "inception_v3_test_data.h5",
+ "evaluate_result": "True",
+ "test_data": "inception_v3_test_data.h5",
+ "print_mpeir": "True"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_016.test b/compiler/one-cmds/tests/onecc_neg_016.test
new file mode 100644
index 000000000..c52763f47
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_016.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Not found" "${filename}.log" &&
+ grep -q "key listed in" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_016.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_016.workflow.json b/compiler/one-cmds/tests/onecc_neg_016.workflow.json
new file mode 100644
index 000000000..c929cf38c
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_016.workflow.json
@@ -0,0 +1,21 @@
+{
+ "workflows": [
+ "SIMPLE_WORKFLOW"
+ ],
+ "SIMPLE_WORKFLOWWWWW": {
+ "steps": [
+ "QUANTIZE"
+ ],
+ "QUANTIZE": {
+ "one-cmd": "one-quantize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_026.q.circle",
+ "input_data": "inception_v3_test_data.h5",
+ "evaluate_result": "True",
+ "test_data": "inception_v3_test_data.h5",
+ "print_mpeir": "True"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_017.test b/compiler/one-cmds/tests/onecc_neg_017.test
new file mode 100644
index 000000000..2f173d2f6
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_017.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Each workflow should have either" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_017.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_017.workflow.json b/compiler/one-cmds/tests/onecc_neg_017.workflow.json
new file mode 100644
index 000000000..22f1415e9
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_017.workflow.json
@@ -0,0 +1,18 @@
+{
+ "workflows": [
+ "SIMPLE_WORKFLOW"
+ ],
+ "SIMPLE_WORKFLOW": {
+ "QUANTIZE": {
+ "one-cmd": "one-quantize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.onecc_026.q.circle",
+ "input_data": "inception_v3_test_data.h5",
+ "evaluate_result": "True",
+ "test_data": "inception_v3_test_data.h5",
+ "print_mpeir": "True"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_018.test b/compiler/one-cmds/tests/onecc_neg_018.test
new file mode 100644
index 000000000..bc2297ed0
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_018.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "are exclusive key" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_018.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_018.workflow.json b/compiler/one-cmds/tests/onecc_neg_018.workflow.json
new file mode 100644
index 000000000..58cb88e17
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_018.workflow.json
@@ -0,0 +1,24 @@
+{
+ "workflows": [
+ "MY_WORKFLOW"
+ ],
+ "MY_WORKFLOW": {
+ "steps": [
+ "IMPORT_TF"
+ ],
+ "cfg-reference": {
+ "path": "/path/to/ini/format/file"
+ },
+ "IMPORT_TF": {
+ "one-cmd": "one-import-tf",
+ "commands": {
+ "input_path": "inception_v3.pb",
+ "output_path": "inception_v3.circle",
+ "input_arrays": "input",
+ "input_shapes": "1,299,299,3",
+ "output_arrays": "InceptionV3/Predictions/Reshape_1",
+ "converter_version": "v2"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_019.test b/compiler/one-cmds/tests/onecc_neg_019.test
new file mode 100644
index 000000000..11ef3a9ee
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_019.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Each step should have" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_019.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_019.workflow.json b/compiler/one-cmds/tests/onecc_neg_019.workflow.json
new file mode 100644
index 000000000..aedeeecca
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_019.workflow.json
@@ -0,0 +1,21 @@
+{
+ "workflows": [
+ "MY_WORKFLOW"
+ ],
+ "MY_WORKFLOW": {
+ "steps": [
+ "IMPORT_TF"
+ ],
+ "IMPORT_TF": {
+ "one-cmddddddddd": "one-import-tf",
+ "commands": {
+ "input_path": "inception_v3.pb",
+ "output_path": "inception_v3.circle",
+ "input_arrays": "input",
+ "input_shapes": "1,299,299,3",
+ "output_arrays": "InceptionV3/Predictions/Reshape_1",
+ "converter_version": "v2"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_020.test b/compiler/one-cmds/tests/onecc_neg_020.test
new file mode 100644
index 000000000..7f5073d82
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_020.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflow file has invalid key
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Each step should have" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_020.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_020.workflow.json b/compiler/one-cmds/tests/onecc_neg_020.workflow.json
new file mode 100644
index 000000000..d3446d38f
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_020.workflow.json
@@ -0,0 +1,21 @@
+{
+ "workflows": [
+ "MY_WORKFLOW"
+ ],
+ "MY_WORKFLOW": {
+ "steps": [
+ "IMPORT_TF"
+ ],
+ "IMPORT_TF": {
+ "one-cmd": "one-import-tf",
+ "commandssssssssss": {
+ "input_path": "inception_v3.pb",
+ "output_path": "inception_v3.circle",
+ "input_arrays": "input",
+ "input_shapes": "1,299,299,3",
+ "output_arrays": "InceptionV3/Predictions/Reshape_1",
+ "converter_version": "v2"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_021.test b/compiler/one-cmds/tests/onecc_neg_021.test
new file mode 100644
index 000000000..e9d4baaee
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_021.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflows have a cycle
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Workflows should not have a cycle" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_021.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_021.workflow.json b/compiler/one-cmds/tests/onecc_neg_021.workflow.json
new file mode 100644
index 000000000..6d21111af
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_021.workflow.json
@@ -0,0 +1,44 @@
+{
+ "workflows": [
+ "CYCLE_WF1",
+ "CYCLE_WF2"
+ ],
+ "CYCLE_WF1": {
+ "run-after": [
+ "CYCLE_WF2"
+ ],
+ "steps": [
+ "IMPORT_TF"
+ ],
+ "IMPORT_TF": {
+ "one-cmd": "one-import-tf",
+ "commands": {
+ "input_path": "inception_v3.pb",
+ "output_path": "inception_v3.circle",
+ "input_arrays": "input",
+ "input_shapes": "1,299,299,3",
+ "output_arrays": "InceptionV3/Predictions/Reshape_1",
+ "converter_version": "v2"
+ }
+ }
+ },
+ "CYCLE_WF2": {
+ "run-after": [
+ "CYCLE_WF1"
+ ],
+ "steps": [
+ "IMPORT_TF"
+ ],
+ "IMPORT_TF": {
+ "one-cmd": "one-import-tf",
+ "commands": {
+ "input_path": "inception_v3.pb",
+ "output_path": "inception_v3.circle",
+ "input_arrays": "input",
+ "input_shapes": "1,299,299,3",
+ "output_arrays": "InceptionV3/Predictions/Reshape_1",
+ "converter_version": "v2"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_022.cfg b/compiler/one-cmds/tests/onecc_neg_022.cfg
new file mode 100644
index 000000000..16135f074
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_022.cfg
@@ -0,0 +1,16 @@
+[onecc]
+one-import-tf=True
+one-import-tflite=False
+one-import-bcq=False
+one-optimize=False
+one-quantize=False
+one-pack=False
+one-codegen=False
+
+[one-import-tf]
+input_path=inception_v3.pb
+output_path=inception_v3_without_opt.circle
+input_arrays=input
+input_shapes=1,299,299,3
+output_arrays=InceptionV3/Predictions/Reshape_1
+converter_version=v2
diff --git a/compiler/one-cmds/tests/onecc_neg_022.test b/compiler/one-cmds/tests/onecc_neg_022.test
new file mode 100644
index 000000000..540071729
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_022.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflows have a cycle
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Workflows should not have a cycle" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_022.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_022.workflow.json b/compiler/one-cmds/tests/onecc_neg_022.workflow.json
new file mode 100644
index 000000000..2e056acf1
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_022.workflow.json
@@ -0,0 +1,63 @@
+{
+ "workflows": [
+ "WITHOUT_OPT",
+ "WITH_OPT",
+ "INFER"
+ ],
+ "INFER": {
+ "run-after": [
+ "WITHOUT_OPT",
+ "WITH_OPT"
+ ],
+ "steps": [
+ "INFER1",
+ "INFER2"
+ ],
+ "INFER1": {
+ "one-cmd": "one-infer",
+ "commands" : {
+ "driver": "dummy-inferV2",
+ "command": "inception_v3_without_opt.circle"
+ }
+ },
+ "INFER2": {
+ "one-cmd": "one-infer",
+ "commands": {
+ "driver": "dummy-inferV2",
+ "command": "inception_v3.opt.circle"
+ }
+ }
+ },
+ "WITHOUT_OPT": {
+ "cfg-reference": {
+ "path": "onecc_041.cfg"
+ }
+ },
+ "WITH_OPT": {
+ "run-after": [
+ "WITHOUT_OPT"
+ ],
+ "steps": [
+ "IMPORT_TF",
+ "OPTIMIZE"
+ ],
+ "IMPORT_TF": {
+ "one-cmd": "one-import-tf",
+ "commands": {
+ "input_path": "inception_v3.pb",
+ "output_path": "inception_v3.circle",
+ "input_arrays": "input",
+ "input_shapes": "1,299,299,3",
+ "output_arrays": "InceptionV3/Predictions/Reshape_1",
+ "converter_version": "v2"
+ }
+ },
+ "OPTIMIZE": {
+ "one-cmd": "one-optimize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.opt.circle"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/onecc_neg_023.test b/compiler/one-cmds/tests/onecc_neg_023.test
new file mode 100644
index 000000000..09717e8ad
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_023.test
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# workflows have wrong optimize option
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "Change outputs failed" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+workflowfile="onecc_neg_023.workflow.json"
+
+# run test
+onecc -W ${workflowfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_neg_023.workflow.json b/compiler/one-cmds/tests/onecc_neg_023.workflow.json
new file mode 100644
index 000000000..056e704fd
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_neg_023.workflow.json
@@ -0,0 +1,30 @@
+{
+ "workflows": [
+ "WITH_OPT"
+ ],
+ "WITH_OPT": {
+ "steps": [
+ "IMPORT_TF",
+ "OPTIMIZE"
+ ],
+ "IMPORT_TF": {
+ "one-cmd": "one-import-tf",
+ "commands": {
+ "input_path": "inception_v3.pb",
+ "output_path": "inception_v3.circle",
+ "input_arrays": "input",
+ "input_shapes": "1,299,299,3",
+ "output_arrays": "InceptionV3/Predictions/Reshape_1",
+ "converter_version": "v2"
+ }
+ },
+ "OPTIMIZE": {
+ "one-cmd": "one-optimize",
+ "commands": {
+ "input_path": "inception_v3.circle",
+ "output_path": "inception_v3.opt.circle",
+ "change_outputs": "non_existing_node_name"
+ }
+ }
+ }
+}
diff --git a/compiler/one-cmds/tests/prepare_test_materials.sh b/compiler/one-cmds/tests/prepare_test_materials.sh
index c80c59834..c171cfe01 100644
--- a/compiler/one-cmds/tests/prepare_test_materials.sh
+++ b/compiler/one-cmds/tests/prepare_test_materials.sh
@@ -91,6 +91,20 @@ if [[ ! -s "onnx_conv2d_conv2d.onnx" ]]; then
# https://github.com/Samsung/ONE/issues/5577#issuecomment-755078444
fi
+if [[ ! -s "reshape_matmul.onnx" ]]; then
+ rm -rf reshape_matmul.zip
+ wget https://github.com/Samsung/ONE/files/9082878/reshape_matmul.zip
+ unzip reshape_matmul.zip
+ # https://github.com/Samsung/ONE/issues/9405#issuecomment-1180198137
+fi
+
+if [[ ! -s "Net_InstanceNorm_003.part" ]]; then
+ rm -rf Net_InstanceNorm_003.zip
+ wget https://github.com/Samsung/ONE/files/8608844/Net_InstanceNorm_003.zip
+ unzip Net_InstanceNorm_003.zip
+ # https://github.com/Samsung/ONE/issues/8570#issuecomment-1115804257
+fi
+
function files_missing() {
condition="test "
diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py
index be0322aca..d204447fd 100644
--- a/compiler/one-cmds/utils.py
+++ b/compiler/one-cmds/utils.py
@@ -47,6 +47,25 @@ def _add_default_arg(parser):
parser.add_argument('-S', '--section', type=str, help=argparse.SUPPRESS)
+def _add_default_arg_no_CS(parser):
+ """
+ This adds -v -V args only (no -C nor -S)
+ """
+ # version
+ parser.add_argument(
+ '-v',
+ '--version',
+ action='store_true',
+ help='show program\'s version number and exit')
+
+ # verbose
+ parser.add_argument(
+ '-V',
+ '--verbose',
+ action='store_true',
+ help='output additional information to stdout or stderr')
+
+
def is_accumulated_arg(arg, driver):
if driver == "one-quantize":
accumulables = [
@@ -62,6 +81,43 @@ def _is_valid_attr(args, attr):
return hasattr(args, attr) and getattr(args, attr)
+class Command:
+ def __init__(self, driver, args, log_file):
+ self.cmd = [driver]
+ self.driver = driver
+ self.args = args
+ self.log_file = log_file
+
+ # Add option if attrs are valid
+ # Option values are collected from self.args
+ def add_option_with_valid_args(self, option, attrs):
+ for attr in attrs:
+ if not _is_valid_attr(self.args, attr):
+ return self
+ self.cmd.append(option)
+ for attr in attrs:
+ self.cmd.append(getattr(self.args, attr))
+ return self
+
+ # Add option and values without any condition
+ def add_option_with_values(self, option, values):
+ self.cmd.append(option)
+ for value in values:
+ self.cmd.append(value)
+ return self
+
+ # Add option with no argument (ex: --verbose) if attr is valid
+ def add_noarg_option_if_valid_arg(self, option, attr):
+ if _is_valid_attr(self.args, attr):
+ self.cmd.append(option)
+ return self
+
+ # Run cmd and save logs
+ def run(self):
+ self.log_file.write((' '.join(self.cmd) + '\n').encode())
+ _run(self.cmd, err_prefix=self.driver, logfile=self.log_file)
+
+
def _parse_cfg_and_overwrite(config_path, section, args):
"""
parse given section of configuration file and set the values of args.
@@ -153,8 +209,7 @@ def _run(cmd, err_prefix=None, logfile=None):
err_prefix: prefix to be put before every stderr lines
logfile: file stream to which both of stdout and stderr lines will be written
"""
- with subprocess.Popen(
- cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1) as p:
+ with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
import select
inputs = set([p.stdout, p.stderr])
while inputs:
diff --git a/compiler/onnx-tools/CMakeLists.txt b/compiler/onnx-tools/CMakeLists.txt
index ac4500e0e..5935cdfbe 100644
--- a/compiler/onnx-tools/CMakeLists.txt
+++ b/compiler/onnx-tools/CMakeLists.txt
@@ -18,4 +18,10 @@ foreach(ONNX_TOOL IN ITEMS ${ONNX_TOOL_FILES})
add_custom_target(${ONNX_TOOL_TARGET} ALL DEPENDS ${ONNX_TOOL_BIN})
+ install(FILES ${ONNX_TOOL_BIN}
+ PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+ GROUP_READ GROUP_EXECUTE
+ WORLD_READ WORLD_EXECUTE
+ DESTINATION bin)
+
endforeach(ONNX_TOOL)
diff --git a/compiler/pota-quantization-value-test/CMakeLists.txt b/compiler/pota-quantization-value-test/CMakeLists.txt
index 51fd9a391..96dfc8687 100644
--- a/compiler/pota-quantization-value-test/CMakeLists.txt
+++ b/compiler/pota-quantization-value-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
unset(QUANTIZATION_VALUE_TEST)
unset(QUANTIZATION_VALUE_TEST_WITH_PARAM)
unset(QUANTIZATION_CONFIG_VALUE_TEST)
diff --git a/compiler/record-minmax-conversion-test/CMakeLists.txt b/compiler/record-minmax-conversion-test/CMakeLists.txt
index 31b906142..636361405 100644
--- a/compiler/record-minmax-conversion-test/CMakeLists.txt
+++ b/compiler/record-minmax-conversion-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
unset(RECORD_MINMAX_CONVERSION_TEST)
macro(addTest NAME)
diff --git a/compiler/record-minmax/driver/Driver.cpp b/compiler/record-minmax/driver/Driver.cpp
index c9f1d0ca7..faa402f01 100644
--- a/compiler/record-minmax/driver/Driver.cpp
+++ b/compiler/record-minmax/driver/Driver.cpp
@@ -34,62 +34,33 @@ int entry(const int argc, char **argv)
arser::Arser arser(
"Embedding min/max values of activations to the circle model for post-training quantization");
- arser.add_argument("--version")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Show version information and exit")
- .exit_with(print_version);
-
- arser.add_argument("-V", "--verbose")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("output additional information to stdout or stderr");
+ arser::Helper::add_version(arser, print_version);
+ arser::Helper::add_verbose(arser);
- arser.add_argument("--input_model")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(true)
- .help("Input model filepath");
+ arser.add_argument("--input_model").required(true).help("Input model filepath");
arser.add_argument("--input_data")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(false)
.help("Input data filepath. If not given, record-minmax will run with randomly generated data. "
"Note that the random dataset does not represent inference workload, leading to poor "
"model accuracy.");
- arser.add_argument("--output_model")
- .nargs(1)
- .type(arser::DataType::STR)
- .required(true)
- .help("Output model filepath");
+ arser.add_argument("--output_model").required(true).help("Output model filepath");
arser.add_argument("--min_percentile")
- .nargs(1)
.type(arser::DataType::FLOAT)
.help("Record n'th percentile of min");
arser.add_argument("--max_percentile")
- .nargs(1)
.type(arser::DataType::FLOAT)
.help("Record n'th percentile of max");
- arser.add_argument("--mode")
- .nargs(1)
- .type(arser::DataType::STR)
- .help("Record mode. percentile (default) or moving_average");
+ arser.add_argument("--mode").help("Record mode. percentile (default) or moving_average");
arser.add_argument("--input_data_format")
- .nargs(1)
- .type(arser::DataType::STR)
.help("Input data format. h5/hdf5 (default) or list/filelist");
arser.add_argument("--generate_profile_data")
.nargs(0)
- .required(false)
.default_value(false)
.help("This will turn on profiling data generation.");
diff --git a/compiler/record-minmax/include/RecordFunction.h b/compiler/record-minmax/include/RecordFunction.h
index ba199d071..5b993e4b3 100644
--- a/compiler/record-minmax/include/RecordFunction.h
+++ b/compiler/record-minmax/include/RecordFunction.h
@@ -18,7 +18,7 @@
#include <cassert>
#include <algorithm>
#include <cmath>
-#include <numeric>
+#include <limits>
#include <stdexcept>
namespace record_minmax
diff --git a/compiler/record-minmax/src/MinMaxObserver.cpp b/compiler/record-minmax/src/MinMaxObserver.cpp
index 8288d3e5e..e6edbdca9 100644
--- a/compiler/record-minmax/src/MinMaxObserver.cpp
+++ b/compiler/record-minmax/src/MinMaxObserver.cpp
@@ -18,6 +18,7 @@
#include <luci/IR/CircleOpcode.h>
+#include <limits>
#include <math.h>
using DataType = luci_interpreter::DataType;
@@ -75,7 +76,7 @@ void MinMaxObserver::postTensorWrite(const luci::CircleNode *node,
// Reshape changes only shape of input tensor, efficiently is it a no-op.
return;
default:
- throw std::runtime_error("Tensor's data type is not float");
+ throw std::runtime_error("Tensor's data type is not float. " + node->name());
}
}
diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp
index 10a14516f..6dbf98dc6 100644
--- a/compiler/record-minmax/src/RecordMinMax.cpp
+++ b/compiler/record-minmax/src/RecordMinMax.cpp
@@ -186,7 +186,13 @@ void RecordMinMax::initialize(const std::string &input_model_path)
throw std::runtime_error("Failed to verify circle '" + input_model_path + "'");
}
- _module = luci::Importer().importModule(circle::GetModel(model_data.data()));
+ const circle::Model *circle_model = circle::GetModel(model_data.data());
+ if (circle_model == nullptr)
+ {
+ throw std::runtime_error("Failed to load '" + input_model_path + "'");
+ }
+
+ _module = luci::Importer().importModule(circle_model);
if (_module == nullptr)
{
diff --git a/compiler/souschef/CMakeLists.txt b/compiler/souschef/CMakeLists.txt
index f57102f1f..8dcf4c2b8 100644
--- a/compiler/souschef/CMakeLists.txt
+++ b/compiler/souschef/CMakeLists.txt
@@ -1,13 +1,20 @@
nnas_find_package(Protobuf QUIET)
+nnas_find_package(Fp16Source QUIET)
if(NOT Protobuf_FOUND)
message(STATUS "Build souschef: FAILED (missing Protobuf)")
return()
endif(NOT Protobuf_FOUND)
+if(NOT Fp16Source_FOUND)
+ message(STATUS "Build souschef: FAILED (missing Fp16Source)")
+ return()
+endif(NOT Fp16Source_FOUND)
+
file(GLOB_RECURSE SOURCES "src/*.cpp")
add_library(souschef STATIC ${SOURCES})
set_target_properties(souschef PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(souschef PRIVATE ${Fp16Source_DIR}/include)
target_include_directories(souschef PUBLIC include)
target_link_libraries(souschef PUBLIC libprotobuf)
diff --git a/compiler/souschef/include/souschef/Data/Explicit.h b/compiler/souschef/include/souschef/Data/Explicit.h
index 7cbb773da..434d0ec2c 100644
--- a/compiler/souschef/include/souschef/Data/Explicit.h
+++ b/compiler/souschef/include/souschef/Data/Explicit.h
@@ -96,6 +96,41 @@ template <typename T> struct ExplicitDataChefFactory : public DataChefFactory
}
};
+class ExplicitFloat16DataChef final : public DataChef
+{
+public:
+ ExplicitFloat16DataChef()
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::vector<uint8_t> generate(int32_t count) const override;
+
+public:
+ void insert(const float &value) { _values.emplace_back(value); }
+
+private:
+ // NOTE store values in float but will convert to uint16_t in generate()
+ std::vector<float> _values;
+};
+
+struct ExplicitFloat16DataChefFactory : public DataChefFactory
+{
+ std::unique_ptr<DataChef> create(const Arguments &args) const
+ {
+ std::unique_ptr<ExplicitFloat16DataChef> res{new ExplicitFloat16DataChef};
+
+ for (uint32_t n = 0; n < args.count(); ++n)
+ {
+ auto const value = to_number<float>(args.value(n));
+ res->insert(value);
+ }
+
+ return std::move(res);
+ }
+};
+
} // namespace souschef
#endif // __SOUSCHEF_DATA_EXPLICIT_H__
diff --git a/compiler/souschef/include/souschef/Data/Gaussian.h b/compiler/souschef/include/souschef/Data/Gaussian.h
index 8093b4c41..c9ac571f9 100644
--- a/compiler/souschef/include/souschef/Data/Gaussian.h
+++ b/compiler/souschef/include/souschef/Data/Gaussian.h
@@ -41,6 +41,22 @@ private:
float _stddev;
};
+class GaussianFloat16DataChef final : public DataChef
+{
+public:
+ GaussianFloat16DataChef(float mean, float stddev) : _mean{mean}, _stddev{stddev}
+ {
+ // DO NOTHING
+ }
+
+public:
+ std::vector<uint8_t> generate(int32_t count) const override;
+
+private:
+ float _mean;
+ float _stddev;
+};
+
class GaussianInt32DataChef final : public DataChef
{
public:
@@ -109,6 +125,11 @@ struct GaussianUint8DataChefFactory : public DataChefFactory
std::unique_ptr<DataChef> create(const Arguments &args) const;
};
+struct GaussianFloat16DataChefFactory : public DataChefFactory
+{
+ std::unique_ptr<DataChef> create(const Arguments &args) const;
+};
+
} // namespace souschef
#endif // __SOUSCHEF_DATA_GAUSSIAN_H__
diff --git a/compiler/souschef/src/Explicit.cpp b/compiler/souschef/src/Explicit.cpp
index eb36cb7c3..3278ae3c3 100644
--- a/compiler/souschef/src/Explicit.cpp
+++ b/compiler/souschef/src/Explicit.cpp
@@ -19,6 +19,8 @@
#include <string>
#include <vector>
+#include <fp16.h>
+
namespace souschef
{
@@ -74,4 +76,23 @@ void ExplicitDataChef<std::string>::write_value(std::vector<uint8_t> &res, int32
}
}
+std::vector<uint8_t> ExplicitFloat16DataChef::generate(int32_t count) const
+{
+ std::vector<uint8_t> res;
+
+ for (uint32_t n = 0; n < count; ++n)
+ {
+ float const fvalue = (n < _values.size()) ? _values.at(n) : 0.0;
+ uint16_t const value = fp16_ieee_from_fp32_value(fvalue);
+ auto const arr = reinterpret_cast<const uint8_t *>(&value);
+
+ for (uint32_t b = 0; b < sizeof(uint16_t); ++b)
+ {
+ res.emplace_back(arr[b]);
+ }
+ }
+
+ return res;
+}
+
} // namespace souschef
diff --git a/compiler/souschef/src/Gaussian.cpp b/compiler/souschef/src/Gaussian.cpp
index 32cbcff4d..53a62cabf 100644
--- a/compiler/souschef/src/Gaussian.cpp
+++ b/compiler/souschef/src/Gaussian.cpp
@@ -23,6 +23,8 @@
#include <cassert>
#include <stdexcept>
+#include <fp16.h>
+
namespace souschef
{
@@ -36,7 +38,7 @@ static std::vector<uint8_t> generate_gaussian(int32_t count, float mean, float s
std::vector<uint8_t> res;
constexpr float max_cap = std::numeric_limits<T>::max();
- constexpr float min_cap = std::numeric_limits<T>::min();
+ constexpr float min_cap = std::numeric_limits<T>::lowest();
for (uint32_t n = 0; n < count; ++n)
{
float raw_value = dist(rand);
@@ -69,6 +71,34 @@ std::vector<uint8_t> GaussianFloat32DataChef::generate(int32_t count) const
return generate_gaussian<float>(count, _mean, _stddev);
}
+std::vector<uint8_t> GaussianFloat16DataChef::generate(int32_t count) const
+{
+ auto time_stamp = std::chrono::system_clock::now().time_since_epoch().count();
+ auto seed = static_cast<std::minstd_rand::result_type>(time_stamp);
+
+ std::minstd_rand rand{static_cast<std::minstd_rand::result_type>(seed)};
+ std::normal_distribution<float> dist{_mean, _stddev};
+
+ std::vector<uint8_t> res;
+
+ constexpr float max_cap = 1e9;
+ constexpr float min_cap = -1e9;
+ for (uint32_t n = 0; n < count; ++n)
+ {
+ float raw_value = dist(rand);
+ const float capped_value = std::max(min_cap, std::min(max_cap, raw_value));
+ const uint16_t value = fp16_ieee_from_fp32_value(capped_value);
+ auto const arr = reinterpret_cast<const uint8_t *>(&value);
+
+ for (uint32_t b = 0; b < sizeof(uint16_t); ++b)
+ {
+ res.emplace_back(arr[b]);
+ }
+ }
+
+ return res;
+}
+
std::vector<uint8_t> GaussianInt32DataChef::generate(int32_t count) const
{
return generate_gaussian<int32_t>(count, _mean, _stddev);
@@ -136,4 +166,17 @@ std::unique_ptr<DataChef> GaussianUint8DataChefFactory::create(const Arguments &
return std::unique_ptr<DataChef>{new GaussianUint8DataChef{mean, stddev}};
}
+std::unique_ptr<DataChef> GaussianFloat16DataChefFactory::create(const Arguments &args) const
+{
+ if (args.count() != 2)
+ {
+ throw std::runtime_error{"invalid argument count: two arguments (mean/stddev) are expected"};
+ }
+
+ auto const mean = to_number<float>(args.value(0));
+ auto const stddev = to_number<float>(args.value(1));
+
+ return std::unique_ptr<DataChef>{new GaussianFloat16DataChef{mean, stddev}};
+}
+
} // namespace souschef
diff --git a/compiler/tf2circle-conversion-test/CMakeLists.txt b/compiler/tf2circle-conversion-test/CMakeLists.txt
index 27f2463f3..79a39873b 100644
--- a/compiler/tf2circle-conversion-test/CMakeLists.txt
+++ b/compiler/tf2circle-conversion-test/CMakeLists.txt
@@ -128,6 +128,10 @@ list(APPEND TEST_DEPS "${TEST_CONFIG}")
# This "tf2circle_conversion_test_deps" target enforces CMake to generate all the dependencies during "build" phase
add_custom_target(tf2circle_conversion_test_deps ALL DEPENDS ${TEST_DEPS})
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
# Run tests
add_test(
NAME tf2circle_conversion_test
diff --git a/compiler/tf2circle-dredd-pb-test/CMakeLists.txt b/compiler/tf2circle-dredd-pb-test/CMakeLists.txt
index 48b098e24..83596fade 100644
--- a/compiler/tf2circle-dredd-pb-test/CMakeLists.txt
+++ b/compiler/tf2circle-dredd-pb-test/CMakeLists.txt
@@ -132,6 +132,10 @@ list(APPEND DEPS "${TARGET_RULE_LIB}")
# Generate dependencies
add_custom_target(tf2circle_dredd_pb_deps ALL DEPENDS ${DEPS})
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
add_test(
NAME tf2circle_dredd_pb_test
COMMAND
diff --git a/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt b/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt
index 789e58535..427e57502 100644
--- a/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt
+++ b/compiler/tf2circle-dredd-pbtxt-test/CMakeLists.txt
@@ -175,6 +175,10 @@ list(APPEND DEPS "${TARGET_RULE_LIB}")
# Generate dependencies
add_custom_target(tf2circle_dredd_pbtxt_deps ALL DEPENDS ${DEPS})
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
add_test(
NAME tf2circle_dredd_pbtxt_test
COMMAND
diff --git a/compiler/tf2circle-model-test/CMakeLists.txt b/compiler/tf2circle-model-test/CMakeLists.txt
index 2fb82236a..ad776a62b 100644
--- a/compiler/tf2circle-model-test/CMakeLists.txt
+++ b/compiler/tf2circle-model-test/CMakeLists.txt
@@ -100,6 +100,10 @@ list(APPEND DEPS "${TEST_RUNNER_SCRIPT}")
### Generate dependencies
add_custom_target(tf2circle_model_test_deps ALL DEPENDS ${DEPS})
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
# NOTE This target is not built by default
add_test(
NAME tf2circle_model_test
diff --git a/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt b/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt
index b75c50772..ac9f14d70 100644
--- a/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt
+++ b/compiler/tf2tflite-dredd-pb-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
nnas_include(TargetRequire)
unset(REQUIRED_TARGETS)
diff --git a/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt b/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt
index 87cf7836f..95a296ef8 100644
--- a/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt
+++ b/compiler/tf2tflite-dredd-pbtxt-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
nnas_include(TargetRequire)
unset(REQUIRED_TARGETS)
diff --git a/compiler/tf2tflite-value-pb-test/CMakeLists.txt b/compiler/tf2tflite-value-pb-test/CMakeLists.txt
index 41974f72c..a6c451e0b 100644
--- a/compiler/tf2tflite-value-pb-test/CMakeLists.txt
+++ b/compiler/tf2tflite-value-pb-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
nnas_include(TargetRequire)
unset(REQUIRED_TARGETS)
diff --git a/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt b/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt
index 2e76e21d3..fde3e60b4 100644
--- a/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt
+++ b/compiler/tf2tflite-value-pbtxt-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
nnas_include(TargetRequire)
unset(REQUIRED_TARGETS)
diff --git a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
index 0b4739374..97aa07fd3 100644
--- a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
+++ b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
nncc_find_resource(TensorFlowTests)
#
diff --git a/compiler/tf2tfliteV2/tf2tfliteV2.py b/compiler/tf2tfliteV2/tf2tfliteV2.py
index 6b578ad53..2bcf55328 100755
--- a/compiler/tf2tfliteV2/tf2tfliteV2.py
+++ b/compiler/tf2tfliteV2/tf2tfliteV2.py
@@ -110,6 +110,12 @@ def _get_parser():
type=str,
help="Names of the output arrays, comma-separated.")
+ # experimental options
+ parser.add_argument(
+ "--experimental_disable_batchmatmul_unfold",
+ action="store_true",
+ help="Experimental disable BatchMatMul unfold")
+
# Set default value
parser.set_defaults(model_format="graph_def")
return parser
@@ -228,6 +234,9 @@ def _v2_convert(flags):
keras_model = tf.keras.models.load_model(flags.input_path)
converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+ if flags.experimental_disable_batchmatmul_unfold:
+ converter._experimental_disable_batchmatmul_unfold = True
+
converter.allow_custom_ops = True
converter.experimental_new_converter = True
diff --git a/compiler/tfl-inspect/CMakeLists.txt b/compiler/tfl-inspect/CMakeLists.txt
index 9e1cb720f..2c6e3a147 100644
--- a/compiler/tfl-inspect/CMakeLists.txt
+++ b/compiler/tfl-inspect/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT TARGET mio_tflite)
+if(NOT TARGET mio_tflite280)
return()
-endif(NOT TARGET mio_tflite)
+endif(NOT TARGET mio_tflite280)
set(DRIVER "driver/Driver.cpp")
diff --git a/compiler/tfl-inspect/driver/Driver.cpp b/compiler/tfl-inspect/driver/Driver.cpp
index 3e62e0ffb..8505ff4aa 100644
--- a/compiler/tfl-inspect/driver/Driver.cpp
+++ b/compiler/tfl-inspect/driver/Driver.cpp
@@ -35,7 +35,7 @@ int entry(int argc, char **argv)
.nargs(0)
.help("Dump Conv2D series weight operators in tflite file");
arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in tflite file");
- arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file to inspect");
+ arser.add_argument("tflite").help("TFLite file to inspect");
try
{
diff --git a/compiler/tfl-verify/CMakeLists.txt b/compiler/tfl-verify/CMakeLists.txt
index 2fba335ea..5bead5bb4 100644
--- a/compiler/tfl-verify/CMakeLists.txt
+++ b/compiler/tfl-verify/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT TARGET mio_tflite)
+if(NOT TARGET mio_tflite280)
return()
-endif(NOT TARGET mio_tflite)
+endif(NOT TARGET mio_tflite280)
file(GLOB_RECURSE SOURCES "src/*.cpp")
diff --git a/compiler/tfl-verify/src/Driver.cpp b/compiler/tfl-verify/src/Driver.cpp
index 6d1897607..62345494b 100644
--- a/compiler/tfl-verify/src/Driver.cpp
+++ b/compiler/tfl-verify/src/Driver.cpp
@@ -25,7 +25,7 @@
int entry(int argc, char **argv)
{
arser::Arser arser;
- arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file path to verify");
+ arser.add_argument("tflite").help("TFLite file path to verify");
try
{
diff --git a/compiler/tflchef/CMakeLists.txt b/compiler/tflchef/CMakeLists.txt
index 948b1cecd..6205ac650 100644
--- a/compiler/tflchef/CMakeLists.txt
+++ b/compiler/tflchef/CMakeLists.txt
@@ -20,4 +20,9 @@ add_subdirectory(core)
add_subdirectory(tflite)
# Tools
add_subdirectory(tools)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
add_subdirectory(tests)
diff --git a/compiler/tflchef/core/src/Convert.cpp b/compiler/tflchef/core/src/Convert.cpp
index 200c71eca..f4dd4b332 100644
--- a/compiler/tflchef/core/src/Convert.cpp
+++ b/compiler/tflchef/core/src/Convert.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -62,6 +63,8 @@ tflite::TensorType as_tflite_tensortype(const tflchef::TensorType &value)
{
case tflchef::FLOAT32:
return tflite::TensorType_FLOAT32;
+ case tflchef::FLOAT16:
+ return tflite::TensorType_FLOAT16;
case tflchef::INT32:
return tflite::TensorType_INT32;
case tflchef::UINT8:
@@ -164,3 +167,222 @@ as_tflite_sparse_index_vec(flatbuffers::FlatBufferBuilder &fb,
throw std::runtime_error("Unknown SparseIndexVector type");
}
+
+// namespace sparsity code referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc
+
+namespace sparsity
+{
+
+template <typename T>
+FormatConverter<T>::FormatConverter(const std::vector<int> &shape,
+ const std::vector<int> &traversal_order,
+ const std::vector<TfLiteDimensionType> &format,
+ const std::vector<int> &block_size,
+ const std::vector<int> &block_map)
+ : dense_shape_(shape), traversal_order_(traversal_order), block_size_(block_size),
+ block_map_(block_map)
+{
+ dense_size_ = 1;
+ int block_dim = 0;
+ blocked_shape_.resize(shape.size());
+ format_.resize(shape.size() + block_map.size());
+ for (int i = 0; i < shape.size(); i++)
+ {
+ format_[i] = format[traversal_order[i]];
+ dense_size_ *= shape[i];
+ if (block_dim < block_map.size() && block_map[block_dim] == i)
+ {
+ blocked_shape_[i] = shape[i] / block_size[block_dim];
+ block_dim++;
+ }
+ else
+ {
+ blocked_shape_[i] = shape[i];
+ }
+ }
+
+ // Only dense blocks are supported.
+ for (int i = 0; i < block_map.size(); i++)
+ {
+ format_[i + shape.size()] = kTfLiteDimDense;
+ }
+}
+
+template <typename T> bool FormatConverter<T>::DenseToSparse(const T *src_data)
+{
+ int num_original_dims = dense_shape_.size();
+ int num_block_dims = block_map_.size();
+ int num_expanded_dims = num_original_dims + num_block_dims;
+ std::vector<int> expanded_shape(num_expanded_dims);
+ for (int i = 0; i < num_expanded_dims; i++)
+ {
+ if (i < num_original_dims)
+ {
+ expanded_shape[i] = blocked_shape_[i];
+ }
+ else
+ {
+ expanded_shape[i] = block_size_[i - num_original_dims];
+ }
+ }
+
+ std::vector<int> shape_offset(num_original_dims);
+ shape_offset[shape_offset.size() - 1] = 1;
+ for (int i = num_original_dims - 1; i > 0; --i)
+ {
+ shape_offset[i - 1] = shape_offset[i] * dense_shape_[i];
+ }
+
+ std::vector<int> expanded_shape_offset(num_expanded_dims);
+ for (int i = 0; i < num_original_dims; ++i)
+ {
+ expanded_shape_offset[i] = shape_offset[i];
+ }
+ for (int i = 0; i < num_block_dims; ++i)
+ {
+ int mapped_dim = block_map_[i];
+ expanded_shape_offset[num_original_dims + i] = shape_offset[mapped_dim];
+ expanded_shape_offset[mapped_dim] *= block_size_[i];
+ }
+
+ std::vector<int> dst_ordered_offset(num_expanded_dims);
+ for (int i = 0; i < num_expanded_dims; ++i)
+ {
+ dst_ordered_offset[i] = expanded_shape_offset[traversal_order_[i]];
+ }
+
+ std::vector<bool> dst_dim_has_nonzeroes(num_expanded_dims);
+ std::fill(dst_dim_has_nonzeroes.begin(), dst_dim_has_nonzeroes.end(), false);
+ std::vector<int> inner_compressed_dim(num_expanded_dims);
+ int most_recent_compressed_dim = -1;
+ std::vector<int> num_segments_of_next_compressed_dim(num_expanded_dims);
+ int segment_count = 1;
+ for (int i = num_expanded_dims - 1; i >= 0; --i)
+ {
+ inner_compressed_dim[i] = most_recent_compressed_dim;
+ if (format_[i] == kTfLiteDimSparseCSR)
+ {
+ most_recent_compressed_dim = i;
+ num_segments_of_next_compressed_dim[i] = segment_count;
+ segment_count = 1;
+ }
+ else
+ {
+ num_segments_of_next_compressed_dim[i] = -1;
+ segment_count *= expanded_shape[traversal_order_[i]];
+ }
+ }
+
+ dim_metadata_.resize(num_expanded_dims * 2);
+ std::vector<int> dst_sparse_dims;
+ dst_sparse_dims.reserve(num_expanded_dims);
+ for (int i = 0; i < num_expanded_dims; ++i)
+ {
+ dim_metadata_[i * 2].clear();
+ dim_metadata_[i * 2 + 1].clear();
+ if (format_[i] == kTfLiteDimDense)
+ {
+ // If dimension is dense, just store the shape.
+ dim_metadata_[i * 2].push_back(expanded_shape[traversal_order_[i]]);
+ }
+ else
+ {
+ dim_metadata_[i * 2].push_back(0); // Segment array always begins with 0.
+ dst_sparse_dims.push_back(i); // Add dimension to the sparse list.
+ }
+ }
+
+ // This algorithm assumes that the block size is small enough for all the
+ // elements to fit in cache, so the strided accesses from different traversal
+ // order and the write-first-erase-later strategy shouldn't be too slow
+ int dst_dim_idx = num_expanded_dims;
+ std::vector<int> coordinate(num_expanded_dims, 0);
+ int dense_tensor_idx = 0;
+ while (dst_dim_idx >= 0)
+ {
+ if (dst_dim_idx == num_expanded_dims)
+ {
+ // We have a complete coordinate. Add the element to the value array if it
+ // is not zero, or if the last dimension is dense.
+ if (!IsZero(src_data[dense_tensor_idx]))
+ {
+ data_.push_back(src_data[dense_tensor_idx]);
+ // Mark all sparse dimensions that their current indices have nonzeroes.
+ for (auto dst_dim : dst_sparse_dims)
+ {
+ if (!dst_dim_has_nonzeroes[dst_dim])
+ {
+ // Only add the index to the indices array if the current nonzero
+ // is the first nonzero of the block.
+ dim_metadata_[2 * dst_dim + 1].push_back(coordinate[dst_dim]);
+ dst_dim_has_nonzeroes[dst_dim] = true;
+ }
+ }
+ }
+ else if (format_[num_expanded_dims - 1] == kTfLiteDimDense)
+ {
+ data_.push_back(src_data[dense_tensor_idx]);
+ }
+ --dst_dim_idx;
+ }
+ else
+ {
+ int original_dim_idx = traversal_order_[dst_dim_idx];
+ int dim_size = expanded_shape[original_dim_idx];
+ if (dst_dim_has_nonzeroes[dst_dim_idx])
+ {
+ // If the previous block has nonzeroes, reset the flag to false since
+ // we have just moved to a new block.
+ dst_dim_has_nonzeroes[dst_dim_idx] = false;
+ }
+ else if (format_[dst_dim_idx] == kTfLiteDimSparseCSR)
+ {
+ // This block is empty. Delete unnecessary values if compressed.
+ int next_compressed_dim = inner_compressed_dim[dst_dim_idx];
+ int erase_offset = dim_metadata_[2 * dst_dim_idx + 1].size() *
+ num_segments_of_next_compressed_dim[dst_dim_idx];
+ if (next_compressed_dim >= 0)
+ {
+ auto &segments = dim_metadata_[2 * inner_compressed_dim[dst_dim_idx]];
+ segments.erase(segments.begin() + 1 + erase_offset, segments.end());
+ }
+ else
+ {
+ data_.erase(data_.begin() + erase_offset, data_.end());
+ }
+ }
+ if (++coordinate[dst_dim_idx] < dim_size)
+ {
+ // The current dst_dim_idx is valid (not out of bound).
+ dense_tensor_idx += dst_ordered_offset[dst_dim_idx];
+ ++dst_dim_idx;
+ }
+ else
+ {
+ // dst_dim_idx has reached its dim size. Update segment array and go
+ // back to incrementing the previous dimension (dst_dim_idx - 1).
+ if (format_[dst_dim_idx] == kTfLiteDimSparseCSR)
+ {
+ dim_metadata_[2 * dst_dim_idx].push_back(dim_metadata_[2 * dst_dim_idx + 1].size());
+ }
+ coordinate[dst_dim_idx] = -1;
+ dense_tensor_idx -= dst_ordered_offset[dst_dim_idx] * dim_size;
+ --dst_dim_idx;
+ }
+ }
+ }
+
+ return true;
+}
+
+template <typename T> bool FormatConverter<T>::IsZero(const T val)
+{
+ return (val == static_cast<T>(0));
+}
+
+template class FormatConverter<float>;
+template class FormatConverter<uint16_t>; // float16
+
+} // namespace sparsity
diff --git a/compiler/tflchef/core/src/Convert.h b/compiler/tflchef/core/src/Convert.h
index 45c93d229..6e910ea2c 100644
--- a/compiler/tflchef/core/src/Convert.h
+++ b/compiler/tflchef/core/src/Convert.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -34,4 +35,52 @@ flatbuffers::Offset<void>
as_tflite_sparse_index_vec(flatbuffers::FlatBufferBuilder &fb,
const ::tflchef::TensorSparsity_IndexVec &value);
+// codes under namespace sparsity referenced from
+// https://github.com/tensorflow/tensorflow/blob/3f878cff5b698b82eea85db2b60d65a2e320850e/
+// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h
+// tensorflow/lite/kernels/internal/utils/sparsity_format_converter.cc
+
+namespace sparsity
+{
+
+// Storage format of each dimension in a sparse tensor.
+typedef enum TfLiteDimensionType
+{
+ kTfLiteDimDense = 0,
+ kTfLiteDimSparseCSR,
+} TfLiteDimensionType;
+
+template <typename T> class FormatConverter
+{
+public:
+ FormatConverter(const std::vector<int32_t> &shape, const std::vector<int32_t> &traversal_order,
+ const std::vector<TfLiteDimensionType> &format,
+ const std::vector<int32_t> &block_size = {},
+ const std::vector<int32_t> &block_map = {});
+
+ bool DenseToSparse(const T *src_data);
+
+ const std::vector<T> &GetData() { return data_; }
+ const std::vector<std::vector<int32_t>> &GetDimMetadata() { return dim_metadata_; }
+
+private:
+ bool IsZero(const T val);
+
+private:
+ std::vector<int32_t> dense_shape_;
+ std::vector<int32_t> blocked_shape_;
+ size_t dense_size_;
+ std::vector<int32_t> traversal_order_;
+ std::vector<TfLiteDimensionType> format_;
+ std::vector<int32_t> block_size_;
+ std::vector<int32_t> block_map_;
+ std::vector<std::vector<int32_t>> dim_metadata_;
+ std::vector<T> data_;
+};
+
+extern template class FormatConverter<float>;
+extern template class FormatConverter<uint16_t>; // float16
+
+} // namespace sparsity
+
#endif // __CONVERT_H__
diff --git a/compiler/tflchef/core/src/DataChef.def b/compiler/tflchef/core/src/DataChef.def
index c634c047e..28a5b7617 100644
--- a/compiler/tflchef/core/src/DataChef.def
+++ b/compiler/tflchef/core/src/DataChef.def
@@ -21,3 +21,7 @@ DATA_CHEF(FLOAT32, gaussian, GaussianFloat32DataChefFactory)
DATA_CHEF(INT32, gaussian, GaussianInt32DataChefFactory)
DATA_CHEF(INT16, gaussian, GaussianInt16DataChefFactory)
DATA_CHEF(UINT8, gaussian, GaussianUint8DataChefFactory)
+
+// FLOAT16 support for only gaussian, explicit for now
+DATA_CHEF(FLOAT16, explicit, ExplicitFloat16DataChefFactory)
+DATA_CHEF(FLOAT16, gaussian, GaussianFloat16DataChefFactory)
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp
index 93b9334a6..a788adc02 100644
--- a/compiler/tflchef/core/src/ModelChef.cpp
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -92,6 +92,7 @@ DataChefRegistry &data_chef_registry(const tflchef::TensorType &type)
static DataChefRegistry string;
static DataChefRegistry boolean;
static DataChefRegistry s16;
+ static DataChefRegistry fp16;
switch (type)
{
@@ -101,6 +102,8 @@ DataChefRegistry &data_chef_registry(const tflchef::TensorType &type)
return s64;
case tflchef::FLOAT32:
return fp32;
+ case tflchef::FLOAT16:
+ return fp16;
case tflchef::UINT8:
return u8;
case tflchef::STRING:
@@ -207,6 +210,41 @@ struct CookParams
std::string noname;
};
+std::vector<flatbuffers::Offset<tflite::DimensionMetadata>>
+make_dim_metadata_vec(flatbuffers::FlatBufferBuilder *flatbuffer_builder, int32_t dims_count,
+ const std::vector<int> &traversal_order_vec,
+ const std::vector<sparsity::TfLiteDimensionType> &format_vec,
+ const std::vector<std::vector<int32_t>> &dim_metadata_src)
+{
+ // Build sparsity parameter.
+ std::vector<flatbuffers::Offset<tflite::DimensionMetadata>> dim_metadata_vec(dims_count);
+ for (int32_t i = 0; i < dims_count; i++)
+ {
+ const int32_t metadata_idx = 2 * i;
+ if (format_vec[traversal_order_vec[i]] == sparsity::kTfLiteDimSparseCSR)
+ {
+ auto array_segments =
+ tflite::CreateInt32Vector(*flatbuffer_builder,
+ flatbuffer_builder->CreateVector(dim_metadata_src[metadata_idx]))
+ .Union();
+ auto array_indices =
+ tflite::CreateInt32Vector(
+ *flatbuffer_builder, flatbuffer_builder->CreateVector(dim_metadata_src[metadata_idx + 1]))
+ .Union();
+ dim_metadata_vec[i] =
+ tflite::CreateDimensionMetadata(*flatbuffer_builder, tflite::DimensionType_SPARSE_CSR, 0,
+ tflite::SparseIndexVector_Int32Vector, array_segments,
+ tflite::SparseIndexVector_Int32Vector, array_indices);
+ }
+ else
+ {
+ dim_metadata_vec[i] = tflite::CreateDimensionMetadata(
+ *flatbuffer_builder, tflite::DimensionType_DENSE, dim_metadata_src[metadata_idx][0]);
+ }
+ }
+ return dim_metadata_vec;
+}
+
template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph, CookParams &cp)
{
LOGGER(l);
@@ -271,6 +309,8 @@ template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph,
assert(operand.has_type());
+ flatbuffers::Offset<tflite::SparsityParameters> sparsity_index;
+
flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape;
std::vector<int32_t> dims;
if (operand.has_shape())
@@ -298,16 +338,125 @@ template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph,
// Create Data
int32_t count = (element_count(dims) > 0) ? element_count(dims) : filler.arg_size();
auto data_vec = chef->generate(count);
- auto data = flatbuffer_builder->CreateVector(data_vec);
- // Create Buffer
- tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
- buffer_builder.add_data(data);
- auto buffer = buffer_builder.Finish();
+ if (operand.has_make_sparse() && operand.make_sparse())
+ {
+ assert(not operand.has_sparsity());
+ assert(operand.has_shape());
+
+ const int32_t dims_count = dims.size();
+ std::vector<int> traversal_order_vec;
+ std::vector<sparsity::TfLiteDimensionType> format_vec;
+ for (int32_t o = 0; o < dims_count; ++o)
+ traversal_order_vec.push_back(o);
+ for (int32_t o = 0; o < dims_count - 1; ++o)
+ format_vec.push_back(sparsity::kTfLiteDimDense);
+ format_vec.push_back(sparsity::kTfLiteDimSparseCSR);
+
+ if (operand.type() == tflchef::FLOAT32)
+ {
+ ::sparsity::FormatConverter<float> converter(dims, traversal_order_vec, format_vec);
+ converter.DenseToSparse(reinterpret_cast<const float *>(data_vec.data()));
+ const auto &sparse_data = converter.GetData();
+
+ std::vector<uint8_t> sparse_uint8;
+ for (int c = 0; c < sparse_data.size(); ++c)
+ {
+ const float value = sparse_data.at(c);
+ const uint8_t *arr = reinterpret_cast<const uint8_t *>(&value);
+ for (uint32_t b = 0; b < sizeof(float); ++b)
+ {
+ sparse_uint8.emplace_back(arr[b]);
+ }
+ }
+ auto data = flatbuffer_builder->CreateVector(sparse_uint8);
+
+ // Create Buffer
+ tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+ buffer_builder.add_data(data);
+ auto buffer = buffer_builder.Finish();
+
+ // Update Buffer Index & Vector
+ buffer_index = buffer_vec.size();
+ buffer_vec.emplace_back(buffer);
+
+ // save SparsityParameters
+ auto traversal_order = flatbuffer_builder->CreateVector(traversal_order_vec);
+
+ // Create block map
+ std::vector<int> block_map_vec{};
+ auto block_map = flatbuffer_builder->CreateVector(block_map_vec);
+
+ // Create dimension metadata
+ const auto &dim_metadata_src = converter.GetDimMetadata();
+ auto dim_metadata_vec =
+ make_dim_metadata_vec(flatbuffer_builder.get(), dims_count, traversal_order_vec,
+ format_vec, dim_metadata_src);
+ auto dim_metadata = flatbuffer_builder->CreateVector(dim_metadata_vec);
+ sparsity_index = tflite::CreateSparsityParameters(*flatbuffer_builder, traversal_order,
+ block_map, dim_metadata);
+ }
+ else if (operand.type() == tflchef::FLOAT16)
+ {
+ ::sparsity::FormatConverter<uint16_t> converter(dims, traversal_order_vec, format_vec);
+ converter.DenseToSparse(reinterpret_cast<const uint16_t *>(data_vec.data()));
+ const auto &sparse_data = converter.GetData();
+
+ std::vector<uint8_t> sparse_uint8;
+ for (int c = 0; c < sparse_data.size(); ++c)
+ {
+ const uint16_t value = sparse_data.at(c);
+ const uint8_t *arr = reinterpret_cast<const uint8_t *>(&value);
+ for (uint32_t b = 0; b < sizeof(uint16_t); ++b)
+ {
+ sparse_uint8.emplace_back(arr[b]);
+ }
+ }
+ auto data = flatbuffer_builder->CreateVector(sparse_uint8);
+
+ // Create Buffer
+ tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+ buffer_builder.add_data(data);
+ auto buffer = buffer_builder.Finish();
+
+ // Update Buffer Index & Vector
+ buffer_index = buffer_vec.size();
+ buffer_vec.emplace_back(buffer);
+
+ // save SparsityParameters
+ auto traversal_order = flatbuffer_builder->CreateVector(traversal_order_vec);
+
+ // Create block map
+ std::vector<int> block_map_vec{};
+ auto block_map = flatbuffer_builder->CreateVector(block_map_vec);
+
+ // Create dimension metadata
+ const auto &dim_metadata_src = converter.GetDimMetadata();
+ auto dim_metadata_vec =
+ make_dim_metadata_vec(flatbuffer_builder.get(), dims_count, traversal_order_vec,
+ format_vec, dim_metadata_src);
+ auto dim_metadata = flatbuffer_builder->CreateVector(dim_metadata_vec);
+ sparsity_index = tflite::CreateSparsityParameters(*flatbuffer_builder, traversal_order,
+ block_map, dim_metadata);
+ }
+ else
+ {
+ throw std::runtime_error{"NYI: unsupported operand type"};
+ }
+ }
+ else
+ {
+ auto data = flatbuffer_builder->CreateVector(data_vec);
+
+ // Create Buffer
+ tflite::BufferBuilder buffer_builder{*flatbuffer_builder};
+ buffer_builder.add_data(data);
+ auto buffer = buffer_builder.Finish();
- // Update Buffer Index & Vector
- buffer_index = buffer_vec.size();
- buffer_vec.emplace_back(buffer);
+ // Update Buffer Index & Vector
+ buffer_index = buffer_vec.size();
+ buffer_vec.emplace_back(buffer);
+ }
}
else
{
@@ -384,8 +533,6 @@ template <typename T> std::map<std::string, int32_t> cook_graph(const T &graph,
quant_index = quant_builder.Finish();
}
- flatbuffers::Offset<tflite::SparsityParameters> sparsity_index;
-
if (operand.has_sparsity())
{
const auto &sparsity = operand.sparsity();
diff --git a/compiler/tflchef/core/src/Op/Densify.cpp b/compiler/tflchef/core/src/Op/Densify.cpp
new file mode 100644
index 000000000..63c4e207a
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Densify.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Densify.h"
+
+flatbuffers::Offset<void> DensifyChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+ tflite::DensifyOptionsBuilder options_builder{fbb};
+
+ return options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> DensifyChefFactory::create(const tflchef::Operation *operation) const
+{
+ return std::unique_ptr<OpChef>{new DensifyChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/Densify.h b/compiler/tflchef/core/src/Op/Densify.h
new file mode 100644
index 000000000..f6af693d9
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/Densify.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_DENSIFY_H__
+#define __OP_DENSIFY_H__
+
+#include "OpChef.h"
+
+class DensifyChef final : public OpChef
+{
+public:
+ explicit DensifyChef(const tflchef::Operation *operation) : _operation{operation}
+ {
+ // DO NOTHING
+ }
+
+public:
+ tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_DENSIFY; }
+
+ tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_DensifyOptions; }
+
+ flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+ const tflchef::Operation *_operation;
+};
+
+struct DensifyChefFactory final : public OpChefFactory
+{
+ std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_DENSIFY_H__
diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def
index beebd359f..c19d00dfb 100644
--- a/compiler/tflchef/core/src/OpChef.def
+++ b/compiler/tflchef/core/src/OpChef.def
@@ -18,6 +18,7 @@ OP_CHEF(Ceil, CeilChefFactory)
OP_CHEF(Concatenation, ConcatenationChefFactory)
OP_CHEF(Conv2D, Conv2DChefFactory)
OP_CHEF(Cos, CosChefFactory)
+OP_CHEF(Densify, DensifyChefFactory)
OP_CHEF(DepthToSpace, DepthToSpaceChefFactory)
OP_CHEF(DepthwiseConv2D, DepthwiseConv2DChefFactory)
OP_CHEF(Dequantize, DequantizeChefFactory)
diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h
index 159019abf..3cd3be558 100644
--- a/compiler/tflchef/core/src/OpChefs.h
+++ b/compiler/tflchef/core/src/OpChefs.h
@@ -31,6 +31,7 @@
#include "Op/Concatenation.h"
#include "Op/Conv2D.h"
#include "Op/Cos.h"
+#include "Op/Densify.h"
#include "Op/DepthToSpace.h"
#include "Op/DepthwiseConv2D.h"
#include "Op/Dequantize.h"
diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto
index 1abefafe1..da4b6920d 100644
--- a/compiler/tflchef/proto/tflchef.proto
+++ b/compiler/tflchef/proto/tflchef.proto
@@ -15,6 +15,7 @@ package tflchef;
// This enum value corresponds to TensorType in TensorFlow Lite schema
enum TensorType {
FLOAT32 = 0;
+ FLOAT16 = 1;
INT32 = 2;
UINT8 = 3;
INT64 = 4;
@@ -88,6 +89,12 @@ message Operand {
optional TensorSparsity sparsity = 6;
optional bool is_variable = 7 [default = false];
optional ShapeSignature shape_signature = 8;
+ // 'make_sparse' is to tell tflchef to make a sparse tensor
+ // as filling 'TensorSparsity' by hand can be difficult
+ // for now, last dimension will be SPARSE_CSR
+ // ex) shape [2, 3, 4] will have
+ // TraversalOrder [0, 1, 2] with [DENSE, DENSE, SPARSE_CSR]
+ optional bool make_sparse = 9 [default = false];
}
// This enum value corresponds to Padding in TensorFlow Lite schema
@@ -534,6 +541,10 @@ message FakeQuantOptions {
optional bool narrow_range = 4 [default = false];
}
+message DensifyOptions {
+ // NONE
+}
+
message Operation {
optional string type = 1;
repeated string input = 2;
@@ -650,6 +661,7 @@ message Operation {
optional AddNOptions add_n_options = 207;
optional MatMulOptions matmul_options = 208;
optional MaxPoolWithArgmaxOptions max_pool_with_argmax_options = 209;
+ optional DensifyOptions densify_options = 210;
// NOTE if there are more than two options with same type of Options
// use the number not listed in the above reserve list
}
diff --git a/compiler/tflchef/tests/make_sparse/test.recipe b/compiler/tflchef/tests/make_sparse/test.recipe
new file mode 100644
index 000000000..15cc93a5d
--- /dev/null
+++ b/compiler/tflchef/tests/make_sparse/test.recipe
@@ -0,0 +1,44 @@
+operand {
+ name: "in"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operand {
+ name: "sparse"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "2" arg: "0" arg: "0" arg: "0"
+ arg: "0" arg: "0" arg: "0" arg: "0"
+ arg: "0" arg: "0" arg: "0" arg: "0"
+ arg: "0" arg: "0" arg: "0" arg: "3"
+ }
+ make_sparse: true
+}
+operand {
+ name: "dense"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operand {
+ name: "out"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operation {
+ type: "Densify"
+ input: "sparse"
+ output: "dense"
+}
+operation {
+ type: "Add"
+ input: "in"
+ input: "dense"
+ output: "out"
+ add_options {
+ activation: NONE
+ }
+}
+input: "in"
+output: "out"
diff --git a/compiler/tflchef/tests/make_sparse_f16/test.recipe b/compiler/tflchef/tests/make_sparse_f16/test.recipe
new file mode 100644
index 000000000..5977a1d32
--- /dev/null
+++ b/compiler/tflchef/tests/make_sparse_f16/test.recipe
@@ -0,0 +1,54 @@
+operand {
+ name: "in"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operand {
+ name: "sparse16"
+ type: FLOAT16
+ shape { dim: 4 dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "2" arg: "0" arg: "0" arg: "0"
+ arg: "0" arg: "0" arg: "0" arg: "0"
+ arg: "0" arg: "0" arg: "0" arg: "0"
+ arg: "0" arg: "0" arg: "0" arg: "3"
+ }
+ make_sparse: true
+}
+operand {
+ name: "dense16"
+ type: FLOAT16
+ shape { dim: 4 dim: 4 }
+}
+operand {
+ name: "dense32"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operand {
+ name: "out"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operation {
+ type: "Densify"
+ input: "sparse16"
+ output: "dense16"
+}
+operation {
+ type: "Dequantize"
+ input: "dense16"
+ output: "dense32"
+}
+operation {
+ type: "Add"
+ input: "in"
+ input: "dense32"
+ output: "out"
+ add_options {
+ activation: NONE
+ }
+}
+input: "in"
+output: "out"
diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt
index 3c3352b0a..d9a20a2e1 100644
--- a/compiler/tflchef/tflite/CMakeLists.txt
+++ b/compiler/tflchef/tflite/CMakeLists.txt
@@ -3,6 +3,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
add_library(tflchef_tflite STATIC ${SOURCES})
target_include_directories(tflchef_tflite PUBLIC include)
target_include_directories(tflchef_tflite PRIVATE src)
+target_include_directories(tflchef_tflite PRIVATE src/Op/include)
target_link_libraries(tflchef_tflite tflchef_proto)
target_link_libraries(tflchef_tflite mio_tflite280)
target_link_libraries(tflchef_tflite mio_tflite280_helper)
diff --git a/compiler/tflchef/tflite/src/Convert.cpp b/compiler/tflchef/tflite/src/Convert.cpp
index f47e51d3d..242987661 100644
--- a/compiler/tflchef/tflite/src/Convert.cpp
+++ b/compiler/tflchef/tflite/src/Convert.cpp
@@ -35,8 +35,9 @@ tflchef::TensorType as_tflchef_type(const tflite::TensorType type)
return tflchef::BOOL;
case tflite::TensorType_INT16:
return tflchef::INT16;
+ case tflite::TensorType_FLOAT16:
+ return tflchef::FLOAT16;
// TODO handle other types
- // TensorType_FLOAT16
// TensorType_STRING
// TensorType_COMPLEX64
default:
diff --git a/compiler/tflchef/tflite/src/FillerHelper.cpp b/compiler/tflchef/tflite/src/FillerHelper.cpp
index cf96d2e8c..1ac99ad40 100644
--- a/compiler/tflchef/tflite/src/FillerHelper.cpp
+++ b/compiler/tflchef/tflite/src/FillerHelper.cpp
@@ -48,3 +48,18 @@ void fill_tensor_to_import(int32_t idx, TFliteImport *import)
}
} // namespace tflchef
+
+// helpers of common codes for filling inputs
+namespace tflchef
+{
+
+void fill_two_inputs(const tflite::Operator *op, TFliteImport *import)
+{
+ const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+ assert(inputs.size() == 2);
+
+ fill_tensor_to_import(inputs[0], import);
+ fill_tensor_to_import(inputs[1], import);
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/FillerHelper.h b/compiler/tflchef/tflite/src/FillerHelper.h
index 053a5c18a..e96ae73d0 100644
--- a/compiler/tflchef/tflite/src/FillerHelper.h
+++ b/compiler/tflchef/tflite/src/FillerHelper.h
@@ -28,4 +28,12 @@ void fill_tensor_to_import(int32_t idx, TFliteImport *import);
} // namespace tflchef
+// helpers of common codes for filling inputs
+namespace tflchef
+{
+
+void fill_two_inputs(const tflite::Operator *op, TFliteImport *import);
+
+} // namespace tflchef
+
#endif // __FILLER_HELPER_H__
diff --git a/compiler/tflchef/tflite/src/Op/Add.cpp b/compiler/tflchef/tflite/src/Op/Add.cpp
index 3e880a63b..23d360616 100644
--- a/compiler/tflchef/tflite/src/Op/Add.cpp
+++ b/compiler/tflchef/tflite/src/Op/Add.cpp
@@ -27,11 +27,7 @@ void TFliteOpAdd::filler(const tflite::Operator *op, TFliteImport *import,
{
// Add may have constant input
- const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
- assert(inputs.size() == 2);
-
- fill_tensor_to_import(inputs[0], import);
- fill_tensor_to_import(inputs[1], import);
+ fill_two_inputs(op, import);
}
tflchef::Operation *TFliteOpAdd::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Maximum.cpp b/compiler/tflchef/tflite/src/Op/Maximum.cpp
index d52caf0c2..65e4c2c99 100644
--- a/compiler/tflchef/tflite/src/Op/Maximum.cpp
+++ b/compiler/tflchef/tflite/src/Op/Maximum.cpp
@@ -25,11 +25,7 @@ namespace tflchef
void TFliteOpMaximum::filler(const tflite::Operator *op, TFliteImport *import,
tflchef::ModelRecipe *model_recipe) const
{
- const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
- assert(inputs.size() == 2);
-
- fill_tensor_to_import(inputs[0], import);
- fill_tensor_to_import(inputs[1], import);
+ fill_two_inputs(op, import);
}
tflchef::Operation *TFliteOpMaximum::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Minimum.cpp b/compiler/tflchef/tflite/src/Op/Minimum.cpp
index 6440f1deb..b4d255ce3 100644
--- a/compiler/tflchef/tflite/src/Op/Minimum.cpp
+++ b/compiler/tflchef/tflite/src/Op/Minimum.cpp
@@ -25,11 +25,7 @@ namespace tflchef
void TFliteOpMinimum::filler(const tflite::Operator *op, TFliteImport *import,
tflchef::ModelRecipe *model_recipe) const
{
- const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
- assert(inputs.size() == 2);
-
- fill_tensor_to_import(inputs[0], import);
- fill_tensor_to_import(inputs[1], import);
+ fill_two_inputs(op, import);
}
tflchef::Operation *TFliteOpMinimum::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Mul.cpp b/compiler/tflchef/tflite/src/Op/Mul.cpp
index 9faa4acaf..1145ff7e6 100644
--- a/compiler/tflchef/tflite/src/Op/Mul.cpp
+++ b/compiler/tflchef/tflite/src/Op/Mul.cpp
@@ -27,11 +27,7 @@ void TFliteOpMul::filler(const tflite::Operator *op, TFliteImport *import,
{
// Mul may have constant input
- const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
- assert(inputs.size() == 2);
-
- fill_tensor_to_import(inputs[0], import);
- fill_tensor_to_import(inputs[1], import);
+ fill_two_inputs(op, import);
}
tflchef::Operation *TFliteOpMul::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp
index ad9921970..4f096ced4 100644
--- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp
+++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.cpp
@@ -38,7 +38,7 @@ void TFliteOpNonMaxSuppressionV4::filler(const tflite::Operator *op, TFliteImpor
for (int32_t index = 2; index < 5; ++index)
{
- fill_tensor_to_import(index, import);
+ fill_tensor_to_import(inputs[index], import);
}
}
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp
index db7f4c932..332cba0ff 100644
--- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp
+++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp
@@ -41,7 +41,7 @@ void TFliteOpNonMaxSuppressionV5::filler(const tflite::Operator *op, TFliteImpor
for (int32_t index = 2; index < 6; ++index)
{
- fill_tensor_to_import(index, import);
+ fill_tensor_to_import(inputs[index], import);
}
}
diff --git a/compiler/tflchef/tflite/src/Op/PadV2.cpp b/compiler/tflchef/tflite/src/Op/PadV2.cpp
index 0b1c9f3b2..a6b657f59 100644
--- a/compiler/tflchef/tflite/src/Op/PadV2.cpp
+++ b/compiler/tflchef/tflite/src/Op/PadV2.cpp
@@ -16,6 +16,7 @@
#include "PadV2.h"
+#include "Convert.h"
#include "FillerHelper.h"
namespace tflchef
@@ -24,9 +25,11 @@ namespace tflchef
void TFliteOpPadV2::filler(const tflite::Operator *op, TFliteImport *import,
tflchef::ModelRecipe *model_recipe) const
{
+ const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+
// Filler for paddings and constant_values
- fill_tensor_to_import(1, import);
- fill_tensor_to_import(2, import);
+ fill_tensor_to_import(inputs[1], import);
+ fill_tensor_to_import(inputs[2], import);
}
tflchef::Operation *TFliteOpPadV2::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/ScatterNd.cpp b/compiler/tflchef/tflite/src/Op/ScatterNd.cpp
index 548a09a67..ec09a69a4 100644
--- a/compiler/tflchef/tflite/src/Op/ScatterNd.cpp
+++ b/compiler/tflchef/tflite/src/Op/ScatterNd.cpp
@@ -25,9 +25,11 @@ namespace tflchef
void TFliteOpScatterNd::filler(const tflite::Operator *op, TFliteImport *import,
tflchef::ModelRecipe *model_recipe) const
{
+ const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+
// Filler for indices and shape
- fill_tensor_to_import(0, import);
- fill_tensor_to_import(2, import);
+ fill_tensor_to_import(inputs[0], import);
+ fill_tensor_to_import(inputs[2], import);
}
tflchef::Operation *TFliteOpScatterNd::build(const tflite::Operator *, TFliteImport *,
diff --git a/compiler/tflchef/tflite/src/Op/SegmentSum.cpp b/compiler/tflchef/tflite/src/Op/SegmentSum.cpp
index a975ca4b3..bc45a94e0 100644
--- a/compiler/tflchef/tflite/src/Op/SegmentSum.cpp
+++ b/compiler/tflchef/tflite/src/Op/SegmentSum.cpp
@@ -16,6 +16,7 @@
#include "SegmentSum.h"
+#include "Convert.h"
#include "FillerHelper.h"
namespace tflchef
@@ -24,8 +25,10 @@ namespace tflchef
void TFliteOpSegmentSum::filler(const tflite::Operator *op, TFliteImport *import,
tflchef::ModelRecipe *model_recipe) const
{
- // Filler for indices and shape
- fill_tensor_to_import(1, import);
+ const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+
+ // Filler for segment_ids
+ fill_tensor_to_import(inputs[1], import);
}
tflchef::Operation *TFliteOpSegmentSum::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Sub.cpp b/compiler/tflchef/tflite/src/Op/Sub.cpp
index 0a08bbfdf..584be0ab9 100644
--- a/compiler/tflchef/tflite/src/Op/Sub.cpp
+++ b/compiler/tflchef/tflite/src/Op/Sub.cpp
@@ -27,11 +27,7 @@ void TFliteOpSub::filler(const tflite::Operator *op, TFliteImport *import,
{
// Sub may have constant input
- const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
- assert(inputs.size() == 2);
-
- fill_tensor_to_import(inputs[0], import);
- fill_tensor_to_import(inputs[1], import);
+ fill_two_inputs(op, import);
}
tflchef::Operation *TFliteOpSub::build(const tflite::Operator *op, TFliteImport *import,
diff --git a/compiler/tflchef/tflite/src/Op/Abs.h b/compiler/tflchef/tflite/src/Op/include/Abs.h
index d99b0d593..d99b0d593 100644
--- a/compiler/tflchef/tflite/src/Op/Abs.h
+++ b/compiler/tflchef/tflite/src/Op/include/Abs.h
diff --git a/compiler/tflchef/tflite/src/Op/Add.h b/compiler/tflchef/tflite/src/Op/include/Add.h
index 49d945f8b..49d945f8b 100644
--- a/compiler/tflchef/tflite/src/Op/Add.h
+++ b/compiler/tflchef/tflite/src/Op/include/Add.h
diff --git a/compiler/tflchef/tflite/src/Op/AddN.h b/compiler/tflchef/tflite/src/Op/include/AddN.h
index 4387aa06a..4387aa06a 100644
--- a/compiler/tflchef/tflite/src/Op/AddN.h
+++ b/compiler/tflchef/tflite/src/Op/include/AddN.h
diff --git a/compiler/tflchef/tflite/src/Op/ArgMax.h b/compiler/tflchef/tflite/src/Op/include/ArgMax.h
index 30068ecf2..30068ecf2 100644
--- a/compiler/tflchef/tflite/src/Op/ArgMax.h
+++ b/compiler/tflchef/tflite/src/Op/include/ArgMax.h
diff --git a/compiler/tflchef/tflite/src/Op/ArgMin.h b/compiler/tflchef/tflite/src/Op/include/ArgMin.h
index 83c643c1a..83c643c1a 100644
--- a/compiler/tflchef/tflite/src/Op/ArgMin.h
+++ b/compiler/tflchef/tflite/src/Op/include/ArgMin.h
diff --git a/compiler/tflchef/tflite/src/Op/AveragePool2D.h b/compiler/tflchef/tflite/src/Op/include/AveragePool2D.h
index f9e9fb254..f9e9fb254 100644
--- a/compiler/tflchef/tflite/src/Op/AveragePool2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/AveragePool2D.h
diff --git a/compiler/tflchef/tflite/src/Op/BatchMatMul.h b/compiler/tflchef/tflite/src/Op/include/BatchMatMul.h
index 6eb4c6e68..6eb4c6e68 100644
--- a/compiler/tflchef/tflite/src/Op/BatchMatMul.h
+++ b/compiler/tflchef/tflite/src/Op/include/BatchMatMul.h
diff --git a/compiler/tflchef/tflite/src/Op/BatchToSpaceND.h b/compiler/tflchef/tflite/src/Op/include/BatchToSpaceND.h
index ae2114c97..ae2114c97 100644
--- a/compiler/tflchef/tflite/src/Op/BatchToSpaceND.h
+++ b/compiler/tflchef/tflite/src/Op/include/BatchToSpaceND.h
diff --git a/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.h b/compiler/tflchef/tflite/src/Op/include/BidirectionalSequenceLSTM.h
index 333f542ac..333f542ac 100644
--- a/compiler/tflchef/tflite/src/Op/BidirectionalSequenceLSTM.h
+++ b/compiler/tflchef/tflite/src/Op/include/BidirectionalSequenceLSTM.h
diff --git a/compiler/tflchef/tflite/src/Op/Cast.h b/compiler/tflchef/tflite/src/Op/include/Cast.h
index 29c126c93..29c126c93 100644
--- a/compiler/tflchef/tflite/src/Op/Cast.h
+++ b/compiler/tflchef/tflite/src/Op/include/Cast.h
diff --git a/compiler/tflchef/tflite/src/Op/Ceil.h b/compiler/tflchef/tflite/src/Op/include/Ceil.h
index 44df20778..44df20778 100644
--- a/compiler/tflchef/tflite/src/Op/Ceil.h
+++ b/compiler/tflchef/tflite/src/Op/include/Ceil.h
diff --git a/compiler/tflchef/tflite/src/Op/Concatenation.h b/compiler/tflchef/tflite/src/Op/include/Concatenation.h
index 4a7ea5791..4a7ea5791 100644
--- a/compiler/tflchef/tflite/src/Op/Concatenation.h
+++ b/compiler/tflchef/tflite/src/Op/include/Concatenation.h
diff --git a/compiler/tflchef/tflite/src/Op/Conv2D.h b/compiler/tflchef/tflite/src/Op/include/Conv2D.h
index 0216e9ce9..0216e9ce9 100644
--- a/compiler/tflchef/tflite/src/Op/Conv2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/Conv2D.h
diff --git a/compiler/tflchef/tflite/src/Op/Cos.h b/compiler/tflchef/tflite/src/Op/include/Cos.h
index 8f3dbe3a6..8f3dbe3a6 100644
--- a/compiler/tflchef/tflite/src/Op/Cos.h
+++ b/compiler/tflchef/tflite/src/Op/include/Cos.h
diff --git a/compiler/tflchef/tflite/src/Op/DepthToSpace.h b/compiler/tflchef/tflite/src/Op/include/DepthToSpace.h
index b5852ac89..b5852ac89 100644
--- a/compiler/tflchef/tflite/src/Op/DepthToSpace.h
+++ b/compiler/tflchef/tflite/src/Op/include/DepthToSpace.h
diff --git a/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h b/compiler/tflchef/tflite/src/Op/include/DepthwiseConv2D.h
index c172536b4..c172536b4 100644
--- a/compiler/tflchef/tflite/src/Op/DepthwiseConv2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/DepthwiseConv2D.h
diff --git a/compiler/tflchef/tflite/src/Op/Dequantize.h b/compiler/tflchef/tflite/src/Op/include/Dequantize.h
index df1c7bbdb..df1c7bbdb 100644
--- a/compiler/tflchef/tflite/src/Op/Dequantize.h
+++ b/compiler/tflchef/tflite/src/Op/include/Dequantize.h
diff --git a/compiler/tflchef/tflite/src/Op/Div.h b/compiler/tflchef/tflite/src/Op/include/Div.h
index 254a4cd99..254a4cd99 100644
--- a/compiler/tflchef/tflite/src/Op/Div.h
+++ b/compiler/tflchef/tflite/src/Op/include/Div.h
diff --git a/compiler/tflchef/tflite/src/Op/ELU.h b/compiler/tflchef/tflite/src/Op/include/ELU.h
index 490c9fde4..490c9fde4 100644
--- a/compiler/tflchef/tflite/src/Op/ELU.h
+++ b/compiler/tflchef/tflite/src/Op/include/ELU.h
diff --git a/compiler/tflchef/tflite/src/Op/Equal.h b/compiler/tflchef/tflite/src/Op/include/Equal.h
index fd4b40001..fd4b40001 100644
--- a/compiler/tflchef/tflite/src/Op/Equal.h
+++ b/compiler/tflchef/tflite/src/Op/include/Equal.h
diff --git a/compiler/tflchef/tflite/src/Op/Exp.h b/compiler/tflchef/tflite/src/Op/include/Exp.h
index 5ff3ddc8b..5ff3ddc8b 100644
--- a/compiler/tflchef/tflite/src/Op/Exp.h
+++ b/compiler/tflchef/tflite/src/Op/include/Exp.h
diff --git a/compiler/tflchef/tflite/src/Op/ExpandDims.h b/compiler/tflchef/tflite/src/Op/include/ExpandDims.h
index e2f3e4e50..e2f3e4e50 100644
--- a/compiler/tflchef/tflite/src/Op/ExpandDims.h
+++ b/compiler/tflchef/tflite/src/Op/include/ExpandDims.h
diff --git a/compiler/tflchef/tflite/src/Op/FakeQuant.h b/compiler/tflchef/tflite/src/Op/include/FakeQuant.h
index f36e615df..f36e615df 100644
--- a/compiler/tflchef/tflite/src/Op/FakeQuant.h
+++ b/compiler/tflchef/tflite/src/Op/include/FakeQuant.h
diff --git a/compiler/tflchef/tflite/src/Op/Fill.h b/compiler/tflchef/tflite/src/Op/include/Fill.h
index 4f46f628a..4f46f628a 100644
--- a/compiler/tflchef/tflite/src/Op/Fill.h
+++ b/compiler/tflchef/tflite/src/Op/include/Fill.h
diff --git a/compiler/tflchef/tflite/src/Op/Floor.h b/compiler/tflchef/tflite/src/Op/include/Floor.h
index f0f8ef38a..f0f8ef38a 100644
--- a/compiler/tflchef/tflite/src/Op/Floor.h
+++ b/compiler/tflchef/tflite/src/Op/include/Floor.h
diff --git a/compiler/tflchef/tflite/src/Op/FloorDiv.h b/compiler/tflchef/tflite/src/Op/include/FloorDiv.h
index 5d049a668..5d049a668 100644
--- a/compiler/tflchef/tflite/src/Op/FloorDiv.h
+++ b/compiler/tflchef/tflite/src/Op/include/FloorDiv.h
diff --git a/compiler/tflchef/tflite/src/Op/FloorMod.h b/compiler/tflchef/tflite/src/Op/include/FloorMod.h
index f36dfe813..f36dfe813 100644
--- a/compiler/tflchef/tflite/src/Op/FloorMod.h
+++ b/compiler/tflchef/tflite/src/Op/include/FloorMod.h
diff --git a/compiler/tflchef/tflite/src/Op/FullyConnected.h b/compiler/tflchef/tflite/src/Op/include/FullyConnected.h
index 8fbe1f3ed..8fbe1f3ed 100644
--- a/compiler/tflchef/tflite/src/Op/FullyConnected.h
+++ b/compiler/tflchef/tflite/src/Op/include/FullyConnected.h
diff --git a/compiler/tflchef/tflite/src/Op/Gather.h b/compiler/tflchef/tflite/src/Op/include/Gather.h
index e01276b76..e01276b76 100644
--- a/compiler/tflchef/tflite/src/Op/Gather.h
+++ b/compiler/tflchef/tflite/src/Op/include/Gather.h
diff --git a/compiler/tflchef/tflite/src/Op/GatherNd.h b/compiler/tflchef/tflite/src/Op/include/GatherNd.h
index 112f23d33..112f23d33 100644
--- a/compiler/tflchef/tflite/src/Op/GatherNd.h
+++ b/compiler/tflchef/tflite/src/Op/include/GatherNd.h
diff --git a/compiler/tflchef/tflite/src/Op/Greater.h b/compiler/tflchef/tflite/src/Op/include/Greater.h
index 3ab2d1a4e..3ab2d1a4e 100644
--- a/compiler/tflchef/tflite/src/Op/Greater.h
+++ b/compiler/tflchef/tflite/src/Op/include/Greater.h
diff --git a/compiler/tflchef/tflite/src/Op/GreaterEqual.h b/compiler/tflchef/tflite/src/Op/include/GreaterEqual.h
index 96b0af78a..96b0af78a 100644
--- a/compiler/tflchef/tflite/src/Op/GreaterEqual.h
+++ b/compiler/tflchef/tflite/src/Op/include/GreaterEqual.h
diff --git a/compiler/tflchef/tflite/src/Op/L2Normalize.h b/compiler/tflchef/tflite/src/Op/include/L2Normalize.h
index a73eae6c8..a73eae6c8 100644
--- a/compiler/tflchef/tflite/src/Op/L2Normalize.h
+++ b/compiler/tflchef/tflite/src/Op/include/L2Normalize.h
diff --git a/compiler/tflchef/tflite/src/Op/L2Pool2D.h b/compiler/tflchef/tflite/src/Op/include/L2Pool2D.h
index 046353440..046353440 100644
--- a/compiler/tflchef/tflite/src/Op/L2Pool2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/L2Pool2D.h
diff --git a/compiler/tflchef/tflite/src/Op/LeakyRelu.h b/compiler/tflchef/tflite/src/Op/include/LeakyRelu.h
index 28e63e0ca..28e63e0ca 100644
--- a/compiler/tflchef/tflite/src/Op/LeakyRelu.h
+++ b/compiler/tflchef/tflite/src/Op/include/LeakyRelu.h
diff --git a/compiler/tflchef/tflite/src/Op/Less.h b/compiler/tflchef/tflite/src/Op/include/Less.h
index 1316cb613..1316cb613 100644
--- a/compiler/tflchef/tflite/src/Op/Less.h
+++ b/compiler/tflchef/tflite/src/Op/include/Less.h
diff --git a/compiler/tflchef/tflite/src/Op/LessEqual.h b/compiler/tflchef/tflite/src/Op/include/LessEqual.h
index 81c710fbc..81c710fbc 100644
--- a/compiler/tflchef/tflite/src/Op/LessEqual.h
+++ b/compiler/tflchef/tflite/src/Op/include/LessEqual.h
diff --git a/compiler/tflchef/tflite/src/Op/LocalResponseNormalization.h b/compiler/tflchef/tflite/src/Op/include/LocalResponseNormalization.h
index c0eb3f2b1..c0eb3f2b1 100644
--- a/compiler/tflchef/tflite/src/Op/LocalResponseNormalization.h
+++ b/compiler/tflchef/tflite/src/Op/include/LocalResponseNormalization.h
diff --git a/compiler/tflchef/tflite/src/Op/Log.h b/compiler/tflchef/tflite/src/Op/include/Log.h
index 9d17e2f81..9d17e2f81 100644
--- a/compiler/tflchef/tflite/src/Op/Log.h
+++ b/compiler/tflchef/tflite/src/Op/include/Log.h
diff --git a/compiler/tflchef/tflite/src/Op/LogSoftmax.h b/compiler/tflchef/tflite/src/Op/include/LogSoftmax.h
index efd81f3e9..efd81f3e9 100644
--- a/compiler/tflchef/tflite/src/Op/LogSoftmax.h
+++ b/compiler/tflchef/tflite/src/Op/include/LogSoftmax.h
diff --git a/compiler/tflchef/tflite/src/Op/LogicalAnd.h b/compiler/tflchef/tflite/src/Op/include/LogicalAnd.h
index 1f7a964b9..1f7a964b9 100644
--- a/compiler/tflchef/tflite/src/Op/LogicalAnd.h
+++ b/compiler/tflchef/tflite/src/Op/include/LogicalAnd.h
diff --git a/compiler/tflchef/tflite/src/Op/LogicalNot.h b/compiler/tflchef/tflite/src/Op/include/LogicalNot.h
index b75d33554..b75d33554 100644
--- a/compiler/tflchef/tflite/src/Op/LogicalNot.h
+++ b/compiler/tflchef/tflite/src/Op/include/LogicalNot.h
diff --git a/compiler/tflchef/tflite/src/Op/LogicalOr.h b/compiler/tflchef/tflite/src/Op/include/LogicalOr.h
index 5331a0d65..5331a0d65 100644
--- a/compiler/tflchef/tflite/src/Op/LogicalOr.h
+++ b/compiler/tflchef/tflite/src/Op/include/LogicalOr.h
diff --git a/compiler/tflchef/tflite/src/Op/Logistic.h b/compiler/tflchef/tflite/src/Op/include/Logistic.h
index a75bf490e..a75bf490e 100644
--- a/compiler/tflchef/tflite/src/Op/Logistic.h
+++ b/compiler/tflchef/tflite/src/Op/include/Logistic.h
diff --git a/compiler/tflchef/tflite/src/Op/MatrixDiag.h b/compiler/tflchef/tflite/src/Op/include/MatrixDiag.h
index 4074f2c36..4074f2c36 100644
--- a/compiler/tflchef/tflite/src/Op/MatrixDiag.h
+++ b/compiler/tflchef/tflite/src/Op/include/MatrixDiag.h
diff --git a/compiler/tflchef/tflite/src/Op/MatrixSetDiag.h b/compiler/tflchef/tflite/src/Op/include/MatrixSetDiag.h
index 0e7ec7f32..0e7ec7f32 100644
--- a/compiler/tflchef/tflite/src/Op/MatrixSetDiag.h
+++ b/compiler/tflchef/tflite/src/Op/include/MatrixSetDiag.h
diff --git a/compiler/tflchef/tflite/src/Op/MaxPool2D.h b/compiler/tflchef/tflite/src/Op/include/MaxPool2D.h
index 36533f80c..36533f80c 100644
--- a/compiler/tflchef/tflite/src/Op/MaxPool2D.h
+++ b/compiler/tflchef/tflite/src/Op/include/MaxPool2D.h
diff --git a/compiler/tflchef/tflite/src/Op/Maximum.h b/compiler/tflchef/tflite/src/Op/include/Maximum.h
index acafec343..acafec343 100644
--- a/compiler/tflchef/tflite/src/Op/Maximum.h
+++ b/compiler/tflchef/tflite/src/Op/include/Maximum.h
diff --git a/compiler/tflchef/tflite/src/Op/Mean.h b/compiler/tflchef/tflite/src/Op/include/Mean.h
index 532c40c66..532c40c66 100644
--- a/compiler/tflchef/tflite/src/Op/Mean.h
+++ b/compiler/tflchef/tflite/src/Op/include/Mean.h
diff --git a/compiler/tflchef/tflite/src/Op/Minimum.h b/compiler/tflchef/tflite/src/Op/include/Minimum.h
index 5db5b7940..5db5b7940 100644
--- a/compiler/tflchef/tflite/src/Op/Minimum.h
+++ b/compiler/tflchef/tflite/src/Op/include/Minimum.h
diff --git a/compiler/tflchef/tflite/src/Op/MirrorPad.h b/compiler/tflchef/tflite/src/Op/include/MirrorPad.h
index c9acdd498..c9acdd498 100644
--- a/compiler/tflchef/tflite/src/Op/MirrorPad.h
+++ b/compiler/tflchef/tflite/src/Op/include/MirrorPad.h
diff --git a/compiler/tflchef/tflite/src/Op/Mul.h b/compiler/tflchef/tflite/src/Op/include/Mul.h
index fd009d2fd..fd009d2fd 100644
--- a/compiler/tflchef/tflite/src/Op/Mul.h
+++ b/compiler/tflchef/tflite/src/Op/include/Mul.h
diff --git a/compiler/tflchef/tflite/src/Op/Neg.h b/compiler/tflchef/tflite/src/Op/include/Neg.h
index c77ab7e84..c77ab7e84 100644
--- a/compiler/tflchef/tflite/src/Op/Neg.h
+++ b/compiler/tflchef/tflite/src/Op/include/Neg.h
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.h b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV4.h
index 114a2ad2f..114a2ad2f 100644
--- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV4.h
+++ b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV4.h
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV5.h
index c948043f4..c948043f4 100644
--- a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h
+++ b/compiler/tflchef/tflite/src/Op/include/NonMaxSuppressionV5.h
diff --git a/compiler/tflchef/tflite/src/Op/NotEqual.h b/compiler/tflchef/tflite/src/Op/include/NotEqual.h
index b1febdcc5..b1febdcc5 100644
--- a/compiler/tflchef/tflite/src/Op/NotEqual.h
+++ b/compiler/tflchef/tflite/src/Op/include/NotEqual.h
diff --git a/compiler/tflchef/tflite/src/Op/OneHot.h b/compiler/tflchef/tflite/src/Op/include/OneHot.h
index 50bbed095..50bbed095 100644
--- a/compiler/tflchef/tflite/src/Op/OneHot.h
+++ b/compiler/tflchef/tflite/src/Op/include/OneHot.h
diff --git a/compiler/tflchef/tflite/src/Op/PRelu.h b/compiler/tflchef/tflite/src/Op/include/PRelu.h
index b35c6e7ce..b35c6e7ce 100644
--- a/compiler/tflchef/tflite/src/Op/PRelu.h
+++ b/compiler/tflchef/tflite/src/Op/include/PRelu.h
diff --git a/compiler/tflchef/tflite/src/Op/Pack.h b/compiler/tflchef/tflite/src/Op/include/Pack.h
index 7779f64ed..7779f64ed 100644
--- a/compiler/tflchef/tflite/src/Op/Pack.h
+++ b/compiler/tflchef/tflite/src/Op/include/Pack.h
diff --git a/compiler/tflchef/tflite/src/Op/Pad.h b/compiler/tflchef/tflite/src/Op/include/Pad.h
index 99998d418..99998d418 100644
--- a/compiler/tflchef/tflite/src/Op/Pad.h
+++ b/compiler/tflchef/tflite/src/Op/include/Pad.h
diff --git a/compiler/tflchef/tflite/src/Op/PadV2.h b/compiler/tflchef/tflite/src/Op/include/PadV2.h
index 3aa474b92..3aa474b92 100644
--- a/compiler/tflchef/tflite/src/Op/PadV2.h
+++ b/compiler/tflchef/tflite/src/Op/include/PadV2.h
diff --git a/compiler/tflchef/tflite/src/Op/Pow.h b/compiler/tflchef/tflite/src/Op/include/Pow.h
index 20e847377..20e847377 100644
--- a/compiler/tflchef/tflite/src/Op/Pow.h
+++ b/compiler/tflchef/tflite/src/Op/include/Pow.h
diff --git a/compiler/tflchef/tflite/src/Op/Quantize.h b/compiler/tflchef/tflite/src/Op/include/Quantize.h
index 256ed5a5c..256ed5a5c 100644
--- a/compiler/tflchef/tflite/src/Op/Quantize.h
+++ b/compiler/tflchef/tflite/src/Op/include/Quantize.h
diff --git a/compiler/tflchef/tflite/src/Op/Range.h b/compiler/tflchef/tflite/src/Op/include/Range.h
index ad10dc58b..ad10dc58b 100644
--- a/compiler/tflchef/tflite/src/Op/Range.h
+++ b/compiler/tflchef/tflite/src/Op/include/Range.h
diff --git a/compiler/tflchef/tflite/src/Op/Rank.h b/compiler/tflchef/tflite/src/Op/include/Rank.h
index 003d9d310..003d9d310 100644
--- a/compiler/tflchef/tflite/src/Op/Rank.h
+++ b/compiler/tflchef/tflite/src/Op/include/Rank.h
diff --git a/compiler/tflchef/tflite/src/Op/ReLU.h b/compiler/tflchef/tflite/src/Op/include/ReLU.h
index be1090270..be1090270 100644
--- a/compiler/tflchef/tflite/src/Op/ReLU.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReLU.h
diff --git a/compiler/tflchef/tflite/src/Op/ReLU6.h b/compiler/tflchef/tflite/src/Op/include/ReLU6.h
index 64ddb6a2e..64ddb6a2e 100644
--- a/compiler/tflchef/tflite/src/Op/ReLU6.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReLU6.h
diff --git a/compiler/tflchef/tflite/src/Op/ReLUN1To1.h b/compiler/tflchef/tflite/src/Op/include/ReLUN1To1.h
index 0767006af..0767006af 100644
--- a/compiler/tflchef/tflite/src/Op/ReLUN1To1.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReLUN1To1.h
diff --git a/compiler/tflchef/tflite/src/Op/ReduceAny.h b/compiler/tflchef/tflite/src/Op/include/ReduceAny.h
index dd5e361d5..dd5e361d5 100644
--- a/compiler/tflchef/tflite/src/Op/ReduceAny.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReduceAny.h
diff --git a/compiler/tflchef/tflite/src/Op/ReduceMax.h b/compiler/tflchef/tflite/src/Op/include/ReduceMax.h
index 8e65cf47c..8e65cf47c 100644
--- a/compiler/tflchef/tflite/src/Op/ReduceMax.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReduceMax.h
diff --git a/compiler/tflchef/tflite/src/Op/ReduceMin.h b/compiler/tflchef/tflite/src/Op/include/ReduceMin.h
index 88cba6fe7..88cba6fe7 100644
--- a/compiler/tflchef/tflite/src/Op/ReduceMin.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReduceMin.h
diff --git a/compiler/tflchef/tflite/src/Op/ReduceProd.h b/compiler/tflchef/tflite/src/Op/include/ReduceProd.h
index e7766840a..e7766840a 100644
--- a/compiler/tflchef/tflite/src/Op/ReduceProd.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReduceProd.h
diff --git a/compiler/tflchef/tflite/src/Op/Reshape.h b/compiler/tflchef/tflite/src/Op/include/Reshape.h
index be9fdac08..be9fdac08 100644
--- a/compiler/tflchef/tflite/src/Op/Reshape.h
+++ b/compiler/tflchef/tflite/src/Op/include/Reshape.h
diff --git a/compiler/tflchef/tflite/src/Op/ResizeBilinear.h b/compiler/tflchef/tflite/src/Op/include/ResizeBilinear.h
index 98c49c534..98c49c534 100644
--- a/compiler/tflchef/tflite/src/Op/ResizeBilinear.h
+++ b/compiler/tflchef/tflite/src/Op/include/ResizeBilinear.h
diff --git a/compiler/tflchef/tflite/src/Op/ResizeNearestNeighbor.h b/compiler/tflchef/tflite/src/Op/include/ResizeNearestNeighbor.h
index 5090bb938..5090bb938 100644
--- a/compiler/tflchef/tflite/src/Op/ResizeNearestNeighbor.h
+++ b/compiler/tflchef/tflite/src/Op/include/ResizeNearestNeighbor.h
diff --git a/compiler/tflchef/tflite/src/Op/ReverseSequence.h b/compiler/tflchef/tflite/src/Op/include/ReverseSequence.h
index 8c8c811e4..8c8c811e4 100644
--- a/compiler/tflchef/tflite/src/Op/ReverseSequence.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReverseSequence.h
diff --git a/compiler/tflchef/tflite/src/Op/ReverseV2.h b/compiler/tflchef/tflite/src/Op/include/ReverseV2.h
index 6a8a75e6b..6a8a75e6b 100644
--- a/compiler/tflchef/tflite/src/Op/ReverseV2.h
+++ b/compiler/tflchef/tflite/src/Op/include/ReverseV2.h
diff --git a/compiler/tflchef/tflite/src/Op/Round.h b/compiler/tflchef/tflite/src/Op/include/Round.h
index df0da3fa1..df0da3fa1 100644
--- a/compiler/tflchef/tflite/src/Op/Round.h
+++ b/compiler/tflchef/tflite/src/Op/include/Round.h
diff --git a/compiler/tflchef/tflite/src/Op/Rsqrt.h b/compiler/tflchef/tflite/src/Op/include/Rsqrt.h
index 5d68344c2..5d68344c2 100644
--- a/compiler/tflchef/tflite/src/Op/Rsqrt.h
+++ b/compiler/tflchef/tflite/src/Op/include/Rsqrt.h
diff --git a/compiler/tflchef/tflite/src/Op/SVDF.h b/compiler/tflchef/tflite/src/Op/include/SVDF.h
index a59ca54a2..a59ca54a2 100644
--- a/compiler/tflchef/tflite/src/Op/SVDF.h
+++ b/compiler/tflchef/tflite/src/Op/include/SVDF.h
diff --git a/compiler/tflchef/tflite/src/Op/ScatterNd.h b/compiler/tflchef/tflite/src/Op/include/ScatterNd.h
index 76362d775..76362d775 100644
--- a/compiler/tflchef/tflite/src/Op/ScatterNd.h
+++ b/compiler/tflchef/tflite/src/Op/include/ScatterNd.h
diff --git a/compiler/tflchef/tflite/src/Op/SegmentSum.h b/compiler/tflchef/tflite/src/Op/include/SegmentSum.h
index d20e63bd7..d20e63bd7 100644
--- a/compiler/tflchef/tflite/src/Op/SegmentSum.h
+++ b/compiler/tflchef/tflite/src/Op/include/SegmentSum.h
diff --git a/compiler/tflchef/tflite/src/Op/Select.h b/compiler/tflchef/tflite/src/Op/include/Select.h
index bf8e57d78..bf8e57d78 100644
--- a/compiler/tflchef/tflite/src/Op/Select.h
+++ b/compiler/tflchef/tflite/src/Op/include/Select.h
diff --git a/compiler/tflchef/tflite/src/Op/SelectV2.h b/compiler/tflchef/tflite/src/Op/include/SelectV2.h
index ff03341d7..ff03341d7 100644
--- a/compiler/tflchef/tflite/src/Op/SelectV2.h
+++ b/compiler/tflchef/tflite/src/Op/include/SelectV2.h
diff --git a/compiler/tflchef/tflite/src/Op/Shape.h b/compiler/tflchef/tflite/src/Op/include/Shape.h
index ebe1befb3..ebe1befb3 100644
--- a/compiler/tflchef/tflite/src/Op/Shape.h
+++ b/compiler/tflchef/tflite/src/Op/include/Shape.h
diff --git a/compiler/tflchef/tflite/src/Op/Sin.h b/compiler/tflchef/tflite/src/Op/include/Sin.h
index 51eabceb5..51eabceb5 100644
--- a/compiler/tflchef/tflite/src/Op/Sin.h
+++ b/compiler/tflchef/tflite/src/Op/include/Sin.h
diff --git a/compiler/tflchef/tflite/src/Op/Slice.h b/compiler/tflchef/tflite/src/Op/include/Slice.h
index 6ca6724d3..6ca6724d3 100644
--- a/compiler/tflchef/tflite/src/Op/Slice.h
+++ b/compiler/tflchef/tflite/src/Op/include/Slice.h
diff --git a/compiler/tflchef/tflite/src/Op/Softmax.h b/compiler/tflchef/tflite/src/Op/include/Softmax.h
index cf168bdd9..cf168bdd9 100644
--- a/compiler/tflchef/tflite/src/Op/Softmax.h
+++ b/compiler/tflchef/tflite/src/Op/include/Softmax.h
diff --git a/compiler/tflchef/tflite/src/Op/SpaceToBatchND.h b/compiler/tflchef/tflite/src/Op/include/SpaceToBatchND.h
index 9d7bc44e8..9d7bc44e8 100644
--- a/compiler/tflchef/tflite/src/Op/SpaceToBatchND.h
+++ b/compiler/tflchef/tflite/src/Op/include/SpaceToBatchND.h
diff --git a/compiler/tflchef/tflite/src/Op/SpaceToDepth.h b/compiler/tflchef/tflite/src/Op/include/SpaceToDepth.h
index 784ad940a..784ad940a 100644
--- a/compiler/tflchef/tflite/src/Op/SpaceToDepth.h
+++ b/compiler/tflchef/tflite/src/Op/include/SpaceToDepth.h
diff --git a/compiler/tflchef/tflite/src/Op/SparseToDense.h b/compiler/tflchef/tflite/src/Op/include/SparseToDense.h
index 5ffe4789d..5ffe4789d 100644
--- a/compiler/tflchef/tflite/src/Op/SparseToDense.h
+++ b/compiler/tflchef/tflite/src/Op/include/SparseToDense.h
diff --git a/compiler/tflchef/tflite/src/Op/Split.h b/compiler/tflchef/tflite/src/Op/include/Split.h
index af247a1b9..af247a1b9 100644
--- a/compiler/tflchef/tflite/src/Op/Split.h
+++ b/compiler/tflchef/tflite/src/Op/include/Split.h
diff --git a/compiler/tflchef/tflite/src/Op/SplitV.h b/compiler/tflchef/tflite/src/Op/include/SplitV.h
index 3f715b5f9..3f715b5f9 100644
--- a/compiler/tflchef/tflite/src/Op/SplitV.h
+++ b/compiler/tflchef/tflite/src/Op/include/SplitV.h
diff --git a/compiler/tflchef/tflite/src/Op/Sqrt.h b/compiler/tflchef/tflite/src/Op/include/Sqrt.h
index 9f0ad04ae..9f0ad04ae 100644
--- a/compiler/tflchef/tflite/src/Op/Sqrt.h
+++ b/compiler/tflchef/tflite/src/Op/include/Sqrt.h
diff --git a/compiler/tflchef/tflite/src/Op/Square.h b/compiler/tflchef/tflite/src/Op/include/Square.h
index 9c008fe52..9c008fe52 100644
--- a/compiler/tflchef/tflite/src/Op/Square.h
+++ b/compiler/tflchef/tflite/src/Op/include/Square.h
diff --git a/compiler/tflchef/tflite/src/Op/SquaredDifference.h b/compiler/tflchef/tflite/src/Op/include/SquaredDifference.h
index 58c2ed460..58c2ed460 100644
--- a/compiler/tflchef/tflite/src/Op/SquaredDifference.h
+++ b/compiler/tflchef/tflite/src/Op/include/SquaredDifference.h
diff --git a/compiler/tflchef/tflite/src/Op/Squeeze.h b/compiler/tflchef/tflite/src/Op/include/Squeeze.h
index b6c89f73d..b6c89f73d 100644
--- a/compiler/tflchef/tflite/src/Op/Squeeze.h
+++ b/compiler/tflchef/tflite/src/Op/include/Squeeze.h
diff --git a/compiler/tflchef/tflite/src/Op/StridedSlice.h b/compiler/tflchef/tflite/src/Op/include/StridedSlice.h
index 98054b9b9..98054b9b9 100644
--- a/compiler/tflchef/tflite/src/Op/StridedSlice.h
+++ b/compiler/tflchef/tflite/src/Op/include/StridedSlice.h
diff --git a/compiler/tflchef/tflite/src/Op/Sub.h b/compiler/tflchef/tflite/src/Op/include/Sub.h
index 2168e5e0d..2168e5e0d 100644
--- a/compiler/tflchef/tflite/src/Op/Sub.h
+++ b/compiler/tflchef/tflite/src/Op/include/Sub.h
diff --git a/compiler/tflchef/tflite/src/Op/Sum.h b/compiler/tflchef/tflite/src/Op/include/Sum.h
index 38eeb080d..38eeb080d 100644
--- a/compiler/tflchef/tflite/src/Op/Sum.h
+++ b/compiler/tflchef/tflite/src/Op/include/Sum.h
diff --git a/compiler/tflchef/tflite/src/Op/Tanh.h b/compiler/tflchef/tflite/src/Op/include/Tanh.h
index 7339e4103..7339e4103 100644
--- a/compiler/tflchef/tflite/src/Op/Tanh.h
+++ b/compiler/tflchef/tflite/src/Op/include/Tanh.h
diff --git a/compiler/tflchef/tflite/src/Op/Tile.h b/compiler/tflchef/tflite/src/Op/include/Tile.h
index 640f52a1f..640f52a1f 100644
--- a/compiler/tflchef/tflite/src/Op/Tile.h
+++ b/compiler/tflchef/tflite/src/Op/include/Tile.h
diff --git a/compiler/tflchef/tflite/src/Op/TopKV2.h b/compiler/tflchef/tflite/src/Op/include/TopKV2.h
index b2b74cc75..b2b74cc75 100644
--- a/compiler/tflchef/tflite/src/Op/TopKV2.h
+++ b/compiler/tflchef/tflite/src/Op/include/TopKV2.h
diff --git a/compiler/tflchef/tflite/src/Op/Transpose.h b/compiler/tflchef/tflite/src/Op/include/Transpose.h
index f0d944b6b..f0d944b6b 100644
--- a/compiler/tflchef/tflite/src/Op/Transpose.h
+++ b/compiler/tflchef/tflite/src/Op/include/Transpose.h
diff --git a/compiler/tflchef/tflite/src/Op/TransposeConv.h b/compiler/tflchef/tflite/src/Op/include/TransposeConv.h
index c79cdabd2..c79cdabd2 100644
--- a/compiler/tflchef/tflite/src/Op/TransposeConv.h
+++ b/compiler/tflchef/tflite/src/Op/include/TransposeConv.h
diff --git a/compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.h b/compiler/tflchef/tflite/src/Op/include/UnidirectionalSequenceLSTM.h
index cc4e5fb0f..cc4e5fb0f 100644
--- a/compiler/tflchef/tflite/src/Op/UnidirectionalSequenceLSTM.h
+++ b/compiler/tflchef/tflite/src/Op/include/UnidirectionalSequenceLSTM.h
diff --git a/compiler/tflchef/tflite/src/Op/Unique.h b/compiler/tflchef/tflite/src/Op/include/Unique.h
index fae037c9f..fae037c9f 100644
--- a/compiler/tflchef/tflite/src/Op/Unique.h
+++ b/compiler/tflchef/tflite/src/Op/include/Unique.h
diff --git a/compiler/tflchef/tflite/src/Op/Unpack.h b/compiler/tflchef/tflite/src/Op/include/Unpack.h
index 1036bdc14..1036bdc14 100644
--- a/compiler/tflchef/tflite/src/Op/Unpack.h
+++ b/compiler/tflchef/tflite/src/Op/include/Unpack.h
diff --git a/compiler/tflchef/tflite/src/Op/Where.h b/compiler/tflchef/tflite/src/Op/include/Where.h
index 00cdc4b00..00cdc4b00 100644
--- a/compiler/tflchef/tflite/src/Op/Where.h
+++ b/compiler/tflchef/tflite/src/Op/include/Where.h
diff --git a/compiler/tflchef/tflite/src/Op/ZerosLike.h b/compiler/tflchef/tflite/src/Op/include/ZerosLike.h
index 163c1fa21..163c1fa21 100644
--- a/compiler/tflchef/tflite/src/Op/ZerosLike.h
+++ b/compiler/tflchef/tflite/src/Op/include/ZerosLike.h
diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h
index b38b35a61..1b9d420e5 100644
--- a/compiler/tflchef/tflite/src/TFliteOpChefs.h
+++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h
@@ -18,115 +18,115 @@
#define __TFLITE_OP_CHEFS_H__
// In alphabet order
-#include "Op/Abs.h"
-#include "Op/Add.h"
-#include "Op/AddN.h"
-#include "Op/ArgMax.h"
-#include "Op/ArgMin.h"
-#include "Op/AveragePool2D.h"
-#include "Op/BatchMatMul.h"
-#include "Op/BatchToSpaceND.h"
-#include "Op/BidirectionalSequenceLSTM.h"
-#include "Op/Cast.h"
-#include "Op/Ceil.h"
-#include "Op/Concatenation.h"
-#include "Op/Conv2D.h"
-#include "Op/Cos.h"
-#include "Op/DepthToSpace.h"
-#include "Op/DepthwiseConv2D.h"
-#include "Op/Dequantize.h"
-#include "Op/Div.h"
-#include "Op/ELU.h"
-#include "Op/Equal.h"
-#include "Op/Exp.h"
-#include "Op/ExpandDims.h"
-#include "Op/FakeQuant.h"
-#include "Op/Fill.h"
-#include "Op/Floor.h"
-#include "Op/FloorDiv.h"
-#include "Op/FloorMod.h"
-#include "Op/FullyConnected.h"
-#include "Op/Gather.h"
-#include "Op/GatherNd.h"
-#include "Op/Greater.h"
-#include "Op/GreaterEqual.h"
-#include "Op/L2Normalize.h"
-#include "Op/L2Pool2D.h"
-#include "Op/LeakyRelu.h"
-#include "Op/Less.h"
-#include "Op/LessEqual.h"
-#include "Op/LocalResponseNormalization.h"
-#include "Op/Log.h"
-#include "Op/LogicalAnd.h"
-#include "Op/LogicalNot.h"
-#include "Op/LogicalOr.h"
-#include "Op/Logistic.h"
-#include "Op/LogSoftmax.h"
-#include "Op/MatrixDiag.h"
-#include "Op/MatrixSetDiag.h"
-#include "Op/Maximum.h"
-#include "Op/MaxPool2D.h"
-#include "Op/Mean.h"
-#include "Op/Minimum.h"
-#include "Op/MirrorPad.h"
-#include "Op/Mul.h"
-#include "Op/Neg.h"
-#include "Op/NonMaxSuppressionV4.h"
-#include "Op/NonMaxSuppressionV5.h"
-#include "Op/NotEqual.h"
-#include "Op/OneHot.h"
-#include "Op/Pack.h"
-#include "Op/Pad.h"
-#include "Op/PadV2.h"
-#include "Op/Pow.h"
-#include "Op/PRelu.h"
-#include "Op/Quantize.h"
-#include "Op/Range.h"
-#include "Op/Rank.h"
-#include "Op/ReduceAny.h"
-#include "Op/ReduceMax.h"
-#include "Op/ReduceMin.h"
-#include "Op/ReduceProd.h"
-#include "Op/ReLU.h"
-#include "Op/ReLU6.h"
-#include "Op/ReLUN1To1.h"
-#include "Op/Reshape.h"
-#include "Op/ResizeBilinear.h"
-#include "Op/ResizeNearestNeighbor.h"
-#include "Op/ReverseSequence.h"
-#include "Op/ReverseV2.h"
-#include "Op/Round.h"
-#include "Op/Rsqrt.h"
-#include "Op/ScatterNd.h"
-#include "Op/SegmentSum.h"
-#include "Op/Select.h"
-#include "Op/SelectV2.h"
-#include "Op/Shape.h"
-#include "Op/Sin.h"
-#include "Op/Slice.h"
-#include "Op/Softmax.h"
-#include "Op/SpaceToBatchND.h"
-#include "Op/SpaceToDepth.h"
-#include "Op/SparseToDense.h"
-#include "Op/Split.h"
-#include "Op/SplitV.h"
-#include "Op/Sqrt.h"
-#include "Op/Square.h"
-#include "Op/SquaredDifference.h"
-#include "Op/Squeeze.h"
-#include "Op/StridedSlice.h"
-#include "Op/Sub.h"
-#include "Op/Sum.h"
-#include "Op/SVDF.h"
-#include "Op/Tanh.h"
-#include "Op/Tile.h"
-#include "Op/TopKV2.h"
-#include "Op/Transpose.h"
-#include "Op/TransposeConv.h"
-#include "Op/UnidirectionalSequenceLSTM.h"
-#include "Op/Unique.h"
-#include "Op/Unpack.h"
-#include "Op/Where.h"
-#include "Op/ZerosLike.h"
+#include "Op/include/Abs.h"
+#include "Op/include/Add.h"
+#include "Op/include/AddN.h"
+#include "Op/include/ArgMax.h"
+#include "Op/include/ArgMin.h"
+#include "Op/include/AveragePool2D.h"
+#include "Op/include/BatchMatMul.h"
+#include "Op/include/BatchToSpaceND.h"
+#include "Op/include/BidirectionalSequenceLSTM.h"
+#include "Op/include/Cast.h"
+#include "Op/include/Ceil.h"
+#include "Op/include/Concatenation.h"
+#include "Op/include/Conv2D.h"
+#include "Op/include/Cos.h"
+#include "Op/include/DepthToSpace.h"
+#include "Op/include/DepthwiseConv2D.h"
+#include "Op/include/Dequantize.h"
+#include "Op/include/Div.h"
+#include "Op/include/ELU.h"
+#include "Op/include/Equal.h"
+#include "Op/include/Exp.h"
+#include "Op/include/ExpandDims.h"
+#include "Op/include/FakeQuant.h"
+#include "Op/include/Fill.h"
+#include "Op/include/Floor.h"
+#include "Op/include/FloorDiv.h"
+#include "Op/include/FloorMod.h"
+#include "Op/include/FullyConnected.h"
+#include "Op/include/Gather.h"
+#include "Op/include/GatherNd.h"
+#include "Op/include/Greater.h"
+#include "Op/include/GreaterEqual.h"
+#include "Op/include/L2Normalize.h"
+#include "Op/include/L2Pool2D.h"
+#include "Op/include/LeakyRelu.h"
+#include "Op/include/Less.h"
+#include "Op/include/LessEqual.h"
+#include "Op/include/LocalResponseNormalization.h"
+#include "Op/include/Log.h"
+#include "Op/include/LogicalAnd.h"
+#include "Op/include/LogicalNot.h"
+#include "Op/include/LogicalOr.h"
+#include "Op/include/Logistic.h"
+#include "Op/include/LogSoftmax.h"
+#include "Op/include/MatrixDiag.h"
+#include "Op/include/MatrixSetDiag.h"
+#include "Op/include/Maximum.h"
+#include "Op/include/MaxPool2D.h"
+#include "Op/include/Mean.h"
+#include "Op/include/Minimum.h"
+#include "Op/include/MirrorPad.h"
+#include "Op/include/Mul.h"
+#include "Op/include/Neg.h"
+#include "Op/include/NonMaxSuppressionV4.h"
+#include "Op/include/NonMaxSuppressionV5.h"
+#include "Op/include/NotEqual.h"
+#include "Op/include/OneHot.h"
+#include "Op/include/Pack.h"
+#include "Op/include/Pad.h"
+#include "Op/include/PadV2.h"
+#include "Op/include/Pow.h"
+#include "Op/include/PRelu.h"
+#include "Op/include/Quantize.h"
+#include "Op/include/Range.h"
+#include "Op/include/Rank.h"
+#include "Op/include/ReduceAny.h"
+#include "Op/include/ReduceMax.h"
+#include "Op/include/ReduceMin.h"
+#include "Op/include/ReduceProd.h"
+#include "Op/include/ReLU.h"
+#include "Op/include/ReLU6.h"
+#include "Op/include/ReLUN1To1.h"
+#include "Op/include/Reshape.h"
+#include "Op/include/ResizeBilinear.h"
+#include "Op/include/ResizeNearestNeighbor.h"
+#include "Op/include/ReverseSequence.h"
+#include "Op/include/ReverseV2.h"
+#include "Op/include/Round.h"
+#include "Op/include/Rsqrt.h"
+#include "Op/include/ScatterNd.h"
+#include "Op/include/SegmentSum.h"
+#include "Op/include/Select.h"
+#include "Op/include/SelectV2.h"
+#include "Op/include/Shape.h"
+#include "Op/include/Sin.h"
+#include "Op/include/Slice.h"
+#include "Op/include/Softmax.h"
+#include "Op/include/SpaceToBatchND.h"
+#include "Op/include/SpaceToDepth.h"
+#include "Op/include/SparseToDense.h"
+#include "Op/include/Split.h"
+#include "Op/include/SplitV.h"
+#include "Op/include/Sqrt.h"
+#include "Op/include/Square.h"
+#include "Op/include/SquaredDifference.h"
+#include "Op/include/Squeeze.h"
+#include "Op/include/StridedSlice.h"
+#include "Op/include/Sub.h"
+#include "Op/include/Sum.h"
+#include "Op/include/SVDF.h"
+#include "Op/include/Tanh.h"
+#include "Op/include/Tile.h"
+#include "Op/include/TopKV2.h"
+#include "Op/include/Transpose.h"
+#include "Op/include/TransposeConv.h"
+#include "Op/include/UnidirectionalSequenceLSTM.h"
+#include "Op/include/Unique.h"
+#include "Op/include/Unpack.h"
+#include "Op/include/Where.h"
+#include "Op/include/ZerosLike.h"
#endif // __TFLITE_OP_CHEFS_H__
diff --git a/compiler/tflchef/tools/file/Driver.cpp b/compiler/tflchef/tools/file/Driver.cpp
index d4605ced3..f6c6789bd 100644
--- a/compiler/tflchef/tools/file/Driver.cpp
+++ b/compiler/tflchef/tools/file/Driver.cpp
@@ -28,10 +28,8 @@
int entry(int argc, char **argv)
{
arser::Arser arser;
- arser.add_argument("recipe")
- .type(arser::DataType::STR)
- .help("Source recipe file path to convert");
- arser.add_argument("tflite").type(arser::DataType::STR).help("Target tflite file path");
+ arser.add_argument("recipe").help("Source recipe file path to convert");
+ arser.add_argument("tflite").help("Target tflite file path");
try
{
diff --git a/compiler/tflchef/tools/reverse/Driver.cpp b/compiler/tflchef/tools/reverse/Driver.cpp
index 1451e8bb8..119bee6be 100644
--- a/compiler/tflchef/tools/reverse/Driver.cpp
+++ b/compiler/tflchef/tools/reverse/Driver.cpp
@@ -25,10 +25,8 @@
int entry(int argc, char **argv)
{
arser::Arser arser;
- arser.add_argument("tflite")
- .type(arser::DataType::STR)
- .help("Source tflite file path to convert");
- arser.add_argument("recipe").type(arser::DataType::STR).help("Target recipe file path");
+ arser.add_argument("tflite").help("Source tflite file path to convert");
+ arser.add_argument("recipe").help("Target recipe file path");
try
{
diff --git a/compiler/tfldump/CMakeLists.txt b/compiler/tfldump/CMakeLists.txt
index fac0be6bf..410232645 100644
--- a/compiler/tfldump/CMakeLists.txt
+++ b/compiler/tfldump/CMakeLists.txt
@@ -10,6 +10,7 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
add_executable(tfldump ${DRIVER} ${SOURCES})
target_include_directories(tfldump PRIVATE include)
target_link_libraries(tfldump arser)
+target_link_libraries(tfldump foder)
target_link_libraries(tfldump mio_tflite280)
target_link_libraries(tfldump mio_tflite280_helper)
target_link_libraries(tfldump safemain)
diff --git a/compiler/tfldump/driver/Driver.cpp b/compiler/tfldump/driver/Driver.cpp
index 38c9c062f..a3e748be1 100644
--- a/compiler/tfldump/driver/Driver.cpp
+++ b/compiler/tfldump/driver/Driver.cpp
@@ -15,7 +15,7 @@
*/
#include <arser/arser.h>
-#include <tflread/Model.h>
+#include <foder/FileLoader.h>
#include <tfldump/Dump.h>
#include <iostream>
@@ -23,7 +23,7 @@
int entry(int argc, char **argv)
{
arser::Arser arser;
- arser.add_argument("tflite").type(arser::DataType::STR).help("TFLite file to dump");
+ arser.add_argument("tflite").help("TFLite file to dump");
try
{
@@ -38,14 +38,9 @@ int entry(int argc, char **argv)
std::string tflite_path = arser.get<std::string>("tflite");
// Load TF lite model from a tflite file
- std::unique_ptr<tflread::Model> model = tflread::load_tflite(tflite_path);
- if (model == nullptr)
- {
- std::cerr << "ERROR: Failed to load tflite '" << tflite_path << "'" << std::endl;
- return 255;
- }
-
- const tflite::Model *tflmodel = model->model();
+ foder::FileLoader fileLoader{tflite_path};
+ std::vector<char> modelData = fileLoader.load();
+ const tflite::Model *tflmodel = tflite::GetModel(modelData.data());
if (tflmodel == nullptr)
{
std::cerr << "ERROR: Failed to load tflite '" << tflite_path << "'" << std::endl;
diff --git a/compiler/tfldump/include/tflread/Model.h b/compiler/tfldump/include/tflread/Model.h
deleted file mode 100644
index c6e4a94ac..000000000
--- a/compiler/tfldump/include/tflread/Model.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __TFLREAD_MODEL_H__
-#define __TFLREAD_MODEL_H__
-
-#include <mio/tflite/schema_generated.h>
-
-#include <memory>
-
-namespace tflread
-{
-
-struct Model
-{
- virtual ~Model() = default;
-
- virtual const ::tflite::Model *model(void) const = 0;
-};
-
-/**
- * @brief Load TensorFlow Lite model (as a raw Model) from a given path
- *
- * @note May return a nullptr
- */
-std::unique_ptr<Model> load_tflite(const std::string &path);
-
-} // namespace tflread
-
-#endif // __TFLREAD_MODEL_H__
diff --git a/compiler/tfldump/requires.cmake b/compiler/tfldump/requires.cmake
index b1abf9486..a11f6b200 100644
--- a/compiler/tfldump/requires.cmake
+++ b/compiler/tfldump/requires.cmake
@@ -1,3 +1,4 @@
require("arser")
+require("foder")
require("mio-tflite280")
require("safemain")
diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp
index 2a87e47d7..4388fcde8 100644
--- a/compiler/tfldump/src/Dump.cpp
+++ b/compiler/tfldump/src/Dump.cpp
@@ -33,7 +33,7 @@ void dump_buffer(std::ostream &os, const uint8_t *buffer, size_t size, size_t am
std::ios_base::fmtflags saveflags(os.flags());
bool second = false;
- bool ellipsis = amount > 0 && size > 4;
+ bool ellipsis = amount > 0 && size > 8;
size_t count = ellipsis ? std::min(size, amount) : size;
for (size_t i = 0; i < count; i++)
@@ -103,8 +103,8 @@ std::ostream &operator<<(std::ostream &os, const flatbuffers::Vector<T> *fbvect)
if (fbvect == nullptr)
return os;
- bool ellipsis = (fbvect->size() > 4);
- auto limit_size = ellipsis ? 4 : fbvect->size();
+ bool ellipsis = (fbvect->size() > 8);
+ auto limit_size = ellipsis ? 8 : fbvect->size();
if (ellipsis)
{
diff --git a/compiler/tfldump/src/Load.cpp b/compiler/tfldump/src/Load.cpp
deleted file mode 100644
index d2f6e06f1..000000000
--- a/compiler/tfldump/src/Load.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <tflread/Model.h>
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-namespace
-{
-
-class MemoryMappedModel final : public tflread::Model
-{
-public:
- /**
- * @require fd and data SHOULD be valid
- */
- explicit MemoryMappedModel(int fd, void *data, size_t size) : _fd{fd}, _data{data}, _size{size}
- {
- // DO NOTHING
- }
-
-public:
- ~MemoryMappedModel()
- {
- munmap(_data, _size);
- close(_fd);
- }
-
-public:
- MemoryMappedModel(const MemoryMappedModel &) = delete;
- MemoryMappedModel(MemoryMappedModel &&) = delete;
-
-public:
- const ::tflite::Model *model(void) const override { return ::tflite::GetModel(_data); }
-
-private:
- int _fd = -1;
- void *_data = nullptr;
- size_t _size = 0;
-};
-
-class FileDescriptor final
-{
-public:
- FileDescriptor(int value) : _value{value}
- {
- // DO NOTHING
- }
-
-public:
- // NOTE Copy is not allowed
- FileDescriptor(const FileDescriptor &) = delete;
-
-public:
- // NOTE Move is allowed
- FileDescriptor(FileDescriptor &&fd) { _value = fd.release(); }
-
-public:
- ~FileDescriptor()
- {
- if (_value != -1)
- {
- // Close on destructor
- close(_value);
- }
- }
-
-public:
- int value(void) const { return _value; }
-
-public:
- int release(void)
- {
- auto res = _value;
- _value = -1;
- return res;
- }
-
-private:
- int _value = -1;
-};
-
-} // namespace
-
-namespace tflread
-{
-
-std::unique_ptr<Model> load_tflite(const std::string &path)
-{
- FileDescriptor fd = open(path.c_str(), O_RDONLY);
-
- if (fd.value() == -1)
- {
- // Return nullptr on open failure
- return nullptr;
- }
-
- struct stat st;
- if (fstat(fd.value(), &st) == -1)
- {
- // Return nullptr on fstat failure
- return nullptr;
- }
-
- auto size = st.st_size;
- auto data = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd.value(), 0);
-
- if (data == MAP_FAILED)
- {
- // Return nullptr on mmap failure
- return nullptr;
- }
-
- return std::unique_ptr<tflread::Model>{new MemoryMappedModel(fd.release(), data, size)};
-}
-
-} // namespace tflread
diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp
index 47edcb086..2e8e7134f 100644
--- a/compiler/tfldump/src/OpPrinter.cpp
+++ b/compiler/tfldump/src/OpPrinter.cpp
@@ -736,6 +736,7 @@ OpPrinterRegistry::OpPrinterRegistry()
// There is no Option for CEIL
_op_map[tflite::BuiltinOperator_CONCATENATION] = make_unique<ConcatenationPrinter>();
_op_map[tflite::BuiltinOperator_CONV_2D] = make_unique<Conv2DPrinter>();
+ // There is no Option for DENSIFY
_op_map[tflite::BuiltinOperator_DEPTH_TO_SPACE] = make_unique<DepthToSpacePrinter>();
_op_map[tflite::BuiltinOperator_DEPTHWISE_CONV_2D] = make_unique<DepthwiseConv2DPrinter>();
// There is no Option for DEQUANTIZE
diff --git a/compiler/tflite2circle-conversion-test/CMakeLists.txt b/compiler/tflite2circle-conversion-test/CMakeLists.txt
index 83fe23a8f..2e67d48bd 100644
--- a/compiler/tflite2circle-conversion-test/CMakeLists.txt
+++ b/compiler/tflite2circle-conversion-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
nnas_include(TargetRequire)
unset(REQUIRED_TARGETS)
diff --git a/compiler/tflite2circle/driver/Driver.cpp b/compiler/tflite2circle/driver/Driver.cpp
index fb8c211b6..6afe1b0f2 100644
--- a/compiler/tflite2circle/driver/Driver.cpp
+++ b/compiler/tflite2circle/driver/Driver.cpp
@@ -36,24 +36,11 @@ int entry(int argc, char **argv)
{
arser::Arser arser{"tflite2circle is a Tensorflow lite to circle model converter"};
- arser.add_argument("--version")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("Show version information and exit")
- .exit_with(print_version);
-
- arser.add_argument("-V", "--verbose")
- .nargs(0)
- .required(false)
- .default_value(false)
- .help("output additional information to stdout or stderr");
-
- arser.add_argument("tflite")
- .nargs(1)
- .type(arser::DataType::STR)
- .help("Source tflite file path to convert");
- arser.add_argument("circle").nargs(1).type(arser::DataType::STR).help("Target circle file path");
+ arser::Helper::add_version(arser, print_version);
+ arser::Helper::add_verbose(arser);
+
+ arser.add_argument("tflite").help("Source tflite file path to convert");
+ arser.add_argument("circle").help("Target circle file path");
try
{
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h
index 88a4f71df..8149197f6 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions.h
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h
@@ -31,8 +31,10 @@
#include "BuildBuiltinOptions/ConcatenationOptions.h"
#include "BuildBuiltinOptions/Conv2DOptions.h"
#include "BuildBuiltinOptions/CosOptions.h"
+#include "BuildBuiltinOptions/DensifyOptions.h"
#include "BuildBuiltinOptions/DepthToSpaceOptions.h"
#include "BuildBuiltinOptions/DepthwiseConv2DOptions.h"
+#include "BuildBuiltinOptions/DequantizeOptions.h"
#include "BuildBuiltinOptions/DivOptions.h"
#include "BuildBuiltinOptions/EqualOptions.h"
#include "BuildBuiltinOptions/ExpandDimsOptions.h"
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp
new file mode 100644
index 000000000..4e5863576
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DensifyOptions.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DensifyOptions>
+build_circle_DensifyOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *)
+{
+ circle::DensifyOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h
new file mode 100644
index 000000000..b6126c4e2
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DensifyOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_DENSIFY_OPTIONS_H__
+#define __BBO_DENSIFY_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DensifyOptions>
+build_circle_DensifyOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_DENSIFY_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp
new file mode 100644
index 000000000..eeacece6a
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DequantizeOptions.h"
+#include "DataLookup.h"
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DequantizeOptions>
+build_circle_DequantizeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+ circle::DequantizeOptionsBuilder builtin_options_builder{fb};
+ return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h
new file mode 100644
index 000000000..1cb9f9c1a
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/DequantizeOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_DEQUANTIZE_OPTIONS_H__
+#define __BBO_DEQUANTIZE_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::DequantizeOptions>
+build_circle_DequantizeOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_DEQUANTIZE_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp
index d2d2888f2..db88d3e82 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/MaximumMinimumOptions.cpp
@@ -25,8 +25,6 @@ namespace tflite2circle
flatbuffers::Offset<circle::MaximumMinimumOptions>
build_circle_MaximumMinimumOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
{
- auto tflite_builtin_options = op->builtin_options_as_MaximumMinimumOptions();
- assert(tflite_builtin_options);
circle::MaximumMinimumOptionsBuilder builtin_options_builder{fb};
return builtin_options_builder.Finish();
}
diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp
index d483b288f..ac017b8f1 100644
--- a/compiler/tflite2circle/src/CircleModel.cpp
+++ b/compiler/tflite2circle/src/CircleModel.cpp
@@ -344,8 +344,13 @@ template <> void Offset<OperatorCodeLink>::build(const TFLFlatBufVec *tflite_fla
circle::OperatorCodeBuilder operator_code_builder{*_fb};
auto de_code = it->deprecated_builtin_code();
auto bt_code = it->builtin_code();
- operator_code_builder.add_deprecated_builtin_code(get_circle_builtin_code(de_code));
- operator_code_builder.add_builtin_code(get_circle_builtin_code(bt_code));
+ auto cir_de_code = get_circle_builtin_code(de_code);
+ auto cir_bt_code = get_circle_builtin_code(bt_code);
+ // correct bt_code where bt_code == 0 for old tflite format
+ if (cir_bt_code == 0)
+ cir_bt_code = static_cast<circle::BuiltinOperator>(cir_de_code);
+ operator_code_builder.add_deprecated_builtin_code(cir_de_code);
+ operator_code_builder.add_builtin_code(cir_bt_code);
operator_code_builder.add_custom_code(custom_code);
operator_code_builder.add_version(it->version());
auto code = operator_code_builder.Finish();
diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
index d55ba464a..9cbf8032a 100644
--- a/compiler/tflite2circle/src/TFLBuiltinOptions.lst
+++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
@@ -42,7 +42,7 @@ TFL_BUILTIN_OPTIONS(TopKV2Options)
TFL_BUILTIN_OPTIONS(SplitOptions)
TFL_BUILTIN_OPTIONS(LogSoftmaxOptions)
TFL_BUILTIN_OPTIONS(CastOptions)
-//TFL_BUILTIN_OPTIONS(DequantizeOptions)
+TFL_BUILTIN_OPTIONS(DequantizeOptions)
TFL_BUILTIN_OPTIONS(MaximumMinimumOptions)
TFL_BUILTIN_OPTIONS(ArgMaxOptions)
TFL_BUILTIN_OPTIONS(LessOptions)
@@ -106,3 +106,4 @@ TFL_BUILTIN_OPTIONS(RankOptions)
TFL_BUILTIN_OPTIONS(ScatterNdOptions)
TFL_BUILTIN_OPTIONS(SegmentSumOptions)
TFL_BUILTIN_OPTIONS(BatchMatMulOptions)
+TFL_BUILTIN_OPTIONS(DensifyOptions)
diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt
index 3841a1b78..93c33cdbd 100644
--- a/compiler/vconone/CMakeLists.txt
+++ b/compiler/vconone/CMakeLists.txt
@@ -1,5 +1,5 @@
if (NOT VCONONE_VERSION)
- set(VCONONE_VERSION 0x0000000000140001)
+ set(VCONONE_VERSION 0x0000000000150001)
# NOTE order is [build patch minor major]
# if VCONONE_VERSION is set with -D option, it will be cached
# you may have to remove cache file if you remove -D option
diff --git a/compiler/vconone/src/version.cpp b/compiler/vconone/src/version.cpp
index d94a7ada6..cebf7d998 100644
--- a/compiler/vconone/src/version.cpp
+++ b/compiler/vconone/src/version.cpp
@@ -54,7 +54,7 @@ std::string get_string(void)
std::string get_copyright(void)
{
std::string str;
- str = "Copyright (c) 2020-2021 Samsung Electronics Co., Ltd. All Rights Reserved\r\n";
+ str = "Copyright (c) 2020-2022 Samsung Electronics Co., Ltd. All Rights Reserved\r\n";
str += "Licensed under the Apache License, Version 2.0\r\n";
str += "https://github.com/Samsung/ONE";
return str;
diff --git a/compute/ARMComputeEx/CMakeLists.txt b/compute/ARMComputeEx/CMakeLists.txt
index 58f558db2..c8d12c249 100644
--- a/compute/ARMComputeEx/CMakeLists.txt
+++ b/compute/ARMComputeEx/CMakeLists.txt
@@ -14,7 +14,7 @@ file(GLOB_RECURSE ACL_EX_SRCS "${ACL_EX_BASE}/*.cpp")
# generate embeded cl_kernel
execute_process (
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
- COMMAND bash -c "python resolve_includes.py"
+ COMMAND bash -c "python3 resolve_includes.py"
)
add_library(arm_compute_ex SHARED ${ACL_EX_SRCS})
diff --git a/compute/cker/CMakeLists.txt b/compute/cker/CMakeLists.txt
index 09f67259c..9b3cd4f36 100644
--- a/compute/cker/CMakeLists.txt
+++ b/compute/cker/CMakeLists.txt
@@ -17,3 +17,20 @@ target_include_directories(nnfw_lib_cker INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/i
# Workaround to avoid warning
# TODO Resolve warning
target_compile_options(nnfw_lib_cker INTERFACE -Wno-attributes)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+set(TEST_CKER test_cker)
+
+file(GLOB_RECURSE TESTS "src/*.test.cc")
+
+add_executable(${TEST_CKER} ${TESTS})
+
+target_link_libraries(${TEST_CKER} nnfw_lib_cker)
+target_link_libraries(${TEST_CKER} nnfw_coverage)
+target_link_libraries(${TEST_CKER} gtest gtest_main ${LIB_PTHREAD})
+
+add_test(${TEST_CKER} ${TEST_CKER})
+install(TARGETS ${TEST_CKER} DESTINATION unittest_standalone)
diff --git a/compute/cker/include/cker/CpuBackendThreadpool.h b/compute/cker/include/cker/CpuBackendThreadpool.h
index cc6a9dbfc..8ec6140bd 100644
--- a/compute/cker/include/cker/CpuBackendThreadpool.h
+++ b/compute/cker/include/cker/CpuBackendThreadpool.h
@@ -21,6 +21,8 @@
#include <ruy/context.h> // from @ruy
#include <ruy/thread_pool.h> // from @ruy
+#include <stdexcept>
+
namespace nnfw
{
namespace cker
@@ -33,7 +35,12 @@ using Task = ruy::Task;
template <typename TaskType>
void Execute(int tasks_count, TaskType *tasks, ruy::Context *ruy_context)
{
+ assert(ruy_context != nullptr);
assert(tasks_count <= ruy_context->max_num_threads());
+ if (ruy_context == nullptr)
+ {
+ throw std::runtime_error("CpuBackendThreadpool.h: ruy::Context is null");
+ }
ruy_context->mutable_thread_pool()->Execute(tasks_count, tasks);
}
diff --git a/compute/cker/include/cker/NeonTensorUtils.h b/compute/cker/include/cker/NeonTensorUtils.h
index 8bf0bee03..45ad969c3 100644
--- a/compute/cker/include/cker/NeonTensorUtils.h
+++ b/compute/cker/include/cker/NeonTensorUtils.h
@@ -632,7 +632,7 @@ inline void NeonCpuBackendGemm(const int8_t *input, const int32_t *bias,
ruy_support::MakeRuyMatrix(rhs_params, input, &ruy_rhs, true);
ruy_support::MakeRuyMatrix(dst_params, scratch, &ruy_dst);
- ruy::BasicSpec<int32_t, int32_t> ruy_mul_params;
+ ruy::MulParams<int32_t, int32_t> ruy_mul_params;
ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params);
ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst);
diff --git a/compute/cker/include/cker/operation/Conv.h b/compute/cker/include/cker/operation/Conv.h
index 16c937a27..7cd54dcd5 100644
--- a/compute/cker/include/cker/operation/Conv.h
+++ b/compute/cker/include/cker/operation/Conv.h
@@ -57,9 +57,9 @@ class Conv
public:
Conv() : _modified_filter_data(), _im2col_shape(4), _need_im2col(false), _prepared(false) {}
- void prepare(const Shape &filter_shape, const float *filter_data, PaddingType padding_type,
- bool &is_replaced_weights, uint32_t dilationWidthFactor,
- uint32_t dilationHeightFactor)
+ void prepareF32(const Shape &filter_shape, const float *filter_data, PaddingType padding_type,
+ bool &is_replaced_weights, uint32_t dilationWidthFactor,
+ uint32_t dilationHeightFactor)
{
if (!_prepared)
{
@@ -71,9 +71,9 @@ public:
}
}
- void prepareQuant(const Shape &input_shape, const Shape &kernel_shape, const Shape &output_shape,
- uint32_t stride_width, uint32_t stride_height, uint32_t dilation_width_factor,
- uint32_t dilation_height_factor)
+ void prepareQ8uPerTensor(const Shape &input_shape, const Shape &kernel_shape,
+ const Shape &output_shape, uint32_t stride_width, uint32_t stride_height,
+ uint32_t dilation_width_factor, uint32_t dilation_height_factor)
{
if (!_prepared)
{
@@ -138,13 +138,25 @@ public:
}
}
+ void operator()(const ConvParams &params, const Shape &input_shape, const uint8_t *input_data,
+ const Shape &filter_shape, const uint8_t *filter_data,
+ const int32_t *filter_zero_point, const Shape &bias_shape,
+ const int32_t *bias_data, const Shape &output_shape, uint8_t *output_data)
+ {
+ reference::Conv<uint8_t, true>(params, _per_channel_output_multiplier.data(),
+ _per_channel_output_shift.data(), input_shape, input_data,
+ filter_shape, filter_data, filter_zero_point, bias_shape,
+ bias_data, output_shape, output_data);
+ }
+
void operator()(const ConvParams &params, const Shape &input_shape, const int8_t *input_data,
const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape,
const int32_t *bias_data, const Shape &output_shape, int8_t *output_data)
{
- reference::Conv(params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
- input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
- output_shape, output_data);
+ reference::Conv<int8_t, false>(params, _per_channel_output_multiplier.data(),
+ _per_channel_output_shift.data(), input_shape, input_data,
+ filter_shape, filter_data, nullptr /* filter_zero_point */,
+ bias_shape, bias_data, output_shape, output_data);
}
std::vector<int32_t> &per_channel_output_multiplier() { return _per_channel_output_multiplier; }
std::vector<int> &per_channel_output_shift() { return _per_channel_output_shift; }
diff --git a/compute/cker/include/cker/operation/DepthwiseConv.h b/compute/cker/include/cker/operation/DepthwiseConv.h
index 06ee780bb..ed1f93d44 100644
--- a/compute/cker/include/cker/operation/DepthwiseConv.h
+++ b/compute/cker/include/cker/operation/DepthwiseConv.h
@@ -25,6 +25,7 @@
#include "cker/operation/optimized/DepthwiseConvFloat.h"
#include "cker/operation/optimized/DepthwiseConvUint8.h"
#include "cker/operation/optimized/integer_ops/DepthwiseConvInt8.h"
+#include "cker/operation/reference/integer_ops/DepthwiseConvUInt8.h"
#include "cker/CpuBackendThreadpool.h"
namespace nnfw
diff --git a/compute/cker/include/cker/operation/reference/Conv.h b/compute/cker/include/cker/operation/reference/Conv.h
index 4474754af..8bfd4694e 100644
--- a/compute/cker/include/cker/operation/reference/Conv.h
+++ b/compute/cker/include/cker/operation/reference/Conv.h
@@ -190,10 +190,13 @@ inline void Conv(const ConvParams &params, const Shape &input_shape, const uint8
}
}
+template <typename T, bool is_asymmetric>
inline void Conv(const ConvParams &params, const int32_t *output_multiplier,
- const int32_t *output_shift, const Shape &input_shape, const int8_t *input_data,
- const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape,
- const int32_t *bias_data, const Shape &output_shape, int8_t *output_data)
+ const int32_t *output_shift, const Shape &input_shape, const T *input_data,
+ const Shape &filter_shape, const T *filter_data, const int32_t *filter_zeropoint,
+ const Shape &bias_shape, const int32_t *bias_data, const Shape &output_shape,
+ T *output_data)
+
{
UNUSED_RELEASE(bias_shape);
// Get parameters.
@@ -259,26 +262,35 @@ inline void Conv(const ConvParams &params, const int32_t *output_multiplier,
for (int in_channel = 0; in_channel < input_depth; ++in_channel)
{
- int32_t input_val = input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
- int32_t filter_val =
+ const T input_val = input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
+ const T filter_val =
filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)];
- // Accumulate with 32 bits accumulator.
- // In the nudging process during model quantization, we force
- // real value of 0.0 be represented by a quantized value. This
- // guarantees that the input_offset is a int8_t, even though
- // it is represented using int32_t. int32_t += int8_t *
- // (int8_t - int8_t) so the highest value we can get from each
- // accumulation is [-127, 127] * ([-128, 127] -
- // [-128, 127]), which is [-32512, 32512]. log2(32512)
- // = 14.98, which means we can accumulate at least 2^16
- // multiplications without overflow. The accumulator is
- // applied to a filter so the accumulation logic will hold as
- // long as the filter size (filter_y * filter_x * in_channel)
- // does not exceed 2^16, which is the case in all the models
- // we have seen so far.
- // TODO(jianlijianli): Add a check to make sure the
- // accumulator depth is smaller than 2^16.
- acc += filter_val * (input_val + input_offset);
+ if (is_asymmetric)
+ {
+ const int32_t filter_offset = -filter_zeropoint[out_channel];
+ acc += (filter_val + filter_offset) * (input_val + input_offset);
+ }
+ else
+ {
+ // Accumulate with 32 bits accumulator.
+ // In the nudging process during model quantization, we force
+ // real value of 0.0 be represented by a quantized value. This
+ // guarantees that the input_offset is a int8_t, even though
+ // it is represented using int32_t. int32_t += int8_t *
+ // (int8_t - int8_t) so the highest value we can get from each
+ // accumulation is [-127, 127] * ([-128, 127] -
+ // [-128, 127]), which is [-32512, 32512]. log2(32512)
+ // = 14.98, which means we can accumulate at least 2^16
+ // multiplications without overflow. The accumulator is
+ // applied to a filter so the accumulation logic will hold as
+ // long as the filter size (filter_y * filter_x * in_channel)
+ // does not exceed 2^16, which is the case in all the models
+ // we have seen so far.
+ // TODO(jianlijianli): Add a check to make sure the
+ // accumulator depth is smaller than 2^16.
+ acc += filter_val * (input_val + input_offset);
+ UNUSED_RELEASE(filter_zeropoint);
+ }
}
}
}
@@ -292,8 +304,7 @@ inline void Conv(const ConvParams &params, const int32_t *output_multiplier,
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
- output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
- static_cast<int8_t>(acc);
+ output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = static_cast<T>(acc);
}
}
}
diff --git a/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvUInt8.h b/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvUInt8.h
new file mode 100644
index 000000000..025e40705
--- /dev/null
+++ b/compute/cker/include/cker/operation/reference/integer_ops/DepthwiseConvUInt8.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__
+#define __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__
+
+#include "cker/Shape.h"
+#include "cker/Types.h"
+#include "cker/Utils.h"
+
+namespace nnfw
+{
+namespace cker
+{
+namespace reference_integer_ops
+{
+inline void DepthwiseConvPerChannel(const DepthwiseConvParams &params,
+ const int32_t *output_multiplier, const int32_t *output_shift,
+ const Shape &input_shape, const uint8_t *input_data,
+ const Shape &filter_shape, const uint8_t *filter_data,
+ const int32_t *filter_zeropoint, const Shape &bias_shape,
+ const int32_t *bias_data, const Shape &output_shape,
+ uint8_t *output_data)
+{
+ // Get parameters.
+ // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro.
+ const int stride_width = params.stride_width;
+ const int stride_height = params.stride_height;
+ const int dilation_width_factor = params.dilation_width_factor;
+ const int dilation_height_factor = params.dilation_height_factor;
+ const int pad_width = params.padding_values.width;
+ const int pad_height = params.padding_values.height;
+ const int depth_multiplier = params.depth_multiplier;
+ const int32_t input_offset = params.input_offset;
+ const int32_t output_offset = params.output_offset;
+ const int32_t output_activation_min = params.quantized_activation_min;
+ const int32_t output_activation_max = params.quantized_activation_max;
+
+ // Check dimensions of the tensors.
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+
+ assert(output_activation_min <= output_activation_max);
+ const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3);
+ const int input_height = input_shape.Dims(1);
+ const int input_width = input_shape.Dims(2);
+ const int input_depth = input_shape.Dims(3);
+ const int filter_height = filter_shape.Dims(1);
+ const int filter_width = filter_shape.Dims(2);
+ const int output_height = output_shape.Dims(1);
+ const int output_width = output_shape.Dims(2);
+ UNUSED_RELEASE(output_depth);
+ UNUSED_RELEASE(bias_shape);
+ assert(output_depth == input_depth * depth_multiplier);
+ assert(bias_shape.FlatSize() == output_depth);
+
+ for (int batch = 0; batch < batches; ++batch)
+ {
+ for (int out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (int out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+ {
+ for (int m = 0; m < depth_multiplier; ++m)
+ {
+ const int output_channel = m + in_channel * depth_multiplier;
+ const int in_x_origin = (out_x * stride_width) - pad_width;
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+ int32_t acc = 0;
+ for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ const int in_x = in_x_origin + dilation_width_factor * filter_x;
+ const int in_y = in_y_origin + dilation_height_factor * filter_y;
+ // Zero padding by omitting the areas outside the image.
+ const bool is_point_inside_image =
+ (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+ if (is_point_inside_image)
+ {
+ uint8_t input_val =
+ input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
+ uint8_t filter_val =
+ filter_data[Offset(filter_shape, 0, filter_y, filter_x, output_channel)];
+
+ // { for per-channel
+ // NOTE: The following comment is copied from tflite int8 implementation
+ // It may not be 100% true for uint8 per-channel.
+ //
+ // Accumulate with 32 bits accumulator.
+ // In the nudging process during model quantization, we force
+ // real value of 0.0 be represented by a quantized value. This
+ // guarantees that the input_offset is a int8, even though it
+ // is represented using int32_t.
+ // int32 += int8 * (int8 - int8) so the highest value we can
+ // get from each accumulation is [-127, 127] * ([-128, 127] -
+ // [-128, 127]), which is [-32512, 32512]. log2(32512)
+ // = 14.98, which means we can accumulate at least 2^16
+ // multiplications without overflow. The accumulator is
+ // applied to a filter so the accumulation logic will hold as
+ // long as the filter size (filter_y * filter_x * in_channel)
+ // does not exceed 2^16, which is the case in all the models
+ // we have seen so far.
+ // TODO(jianlijianli): Add a check to make sure the
+ // accumulator depth is smaller than 2^16.
+ const int32_t filter_offset = -filter_zeropoint[output_channel];
+ acc += (filter_val + filter_offset) * (input_val + input_offset);
+ // } for per-channel
+ }
+ }
+ }
+ if (bias_data)
+ {
+ acc += bias_data[output_channel];
+ }
+ acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[output_channel],
+ output_shift[output_channel]);
+ acc += output_offset;
+ acc = std::max(acc, output_activation_min);
+ acc = std::min(acc, output_activation_max);
+ // For q8u per-channel, int8_t -> uint8_t
+ output_data[Offset(output_shape, batch, out_y, out_x, output_channel)] =
+ static_cast<uint8_t>(acc);
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace reference_integer_ops
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_REFERENCE_DEPTHWISE_CONV_UINT8_H__
diff --git a/compute/cker/include/cker/ruy/RuySupport.h b/compute/cker/include/cker/ruy/RuySupport.h
index 62eeaf6bd..14489a804 100644
--- a/compute/cker/include/cker/ruy/RuySupport.h
+++ b/compute/cker/include/cker/ruy/RuySupport.h
@@ -64,23 +64,35 @@ void MakeRuyMatrix(const MatrixParams<Scalar> &params, DataPointer data_ptr,
}
}
-template <typename GemmParamsType, typename RuySpecType>
-void MakeRuyMulParams(const GemmParamsType &params, RuySpecType *ruy_mul_params)
+// Integer-quantized case with destination type narrower than int32
+template <typename DstScalar, QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<std::int32_t, DstScalar, quantization_flavor> &params,
+ ruy::MulParams<std::int32_t, DstScalar> *ruy_mul_params)
{
- // This validation has already been performed by the Gemm API entry point,
- // but it doesn't hurt to test specifically this again here, where it's
- // being used.
- ValidateGemmParams(params);
-
- ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint);
- ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent);
- ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel);
- ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel);
+ static_assert(sizeof(DstScalar) < sizeof(std::int32_t), "");
+ if (quantization_flavor == QuantizationFlavor::kIntegerWithUniformMultiplier)
+ {
+ ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint);
+ ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent);
+ }
+ if (quantization_flavor == QuantizationFlavor::kIntegerWithPerRowMultiplier)
+ {
+ ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel);
+ ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel);
+ }
ruy_mul_params->set_bias(params.bias);
ruy_mul_params->set_clamp_min(params.clamp_min);
ruy_mul_params->set_clamp_max(params.clamp_max);
}
+// Raw-integer case with destination type int32.
+template <QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<std::int32_t, std::int32_t, quantization_flavor> &params,
+ ruy::MulParams<std::int32_t, std::int32_t> *ruy_mul_params)
+{
+ ruy_mul_params->set_bias(params.bias);
+}
+
} // namespace ruy_support
} // namespace cker
} // namespace nnfw
diff --git a/compute/test/cker/Range.cc b/compute/cker/src/Range.test.cc
index e5fe4801f..e5fe4801f 100644
--- a/compute/test/cker/Range.cc
+++ b/compute/cker/src/Range.test.cc
diff --git a/compute/ruy/include/ruy/RuySupport.h b/compute/ruy/include/ruy/RuySupport.h
index 7086a96c4..2f9ed7457 100644
--- a/compute/ruy/include/ruy/RuySupport.h
+++ b/compute/ruy/include/ruy/RuySupport.h
@@ -64,23 +64,46 @@ void MakeRuyMatrix(const MatrixParams<Scalar> &params, DataPointer data_ptr,
}
}
-template <typename GemmParamsType, typename RuySpecType>
-void MakeRuyMulParams(const GemmParamsType &params, RuySpecType *ruy_mul_params)
+// Floating-point case.
+template <typename AccumScalar, typename DstScalar, QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<AccumScalar, DstScalar, quantization_flavor> &params,
+ ::ruy::MulParams<AccumScalar, DstScalar> *ruy_mul_params)
{
- // This validation has already been performed by the Gemm API entry point,
- // but it doesn't hurt to test specifically this again here, where it's
- // being used.
- ValidateGemmParams(params);
+ static_assert(quantization_flavor == QuantizationFlavor::kFloatingPoint, "");
+ ruy_mul_params->set_bias(params.bias);
+ ruy_mul_params->set_clamp_min(params.clamp_min);
+ ruy_mul_params->set_clamp_max(params.clamp_max);
+}
- ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint);
- ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent);
- ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel);
- ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel);
+// Integer-quantized case with destination type narrower than int32
+template <typename DstScalar, QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<std::int32_t, DstScalar, quantization_flavor> &params,
+ ::ruy::MulParams<std::int32_t, DstScalar> *ruy_mul_params)
+{
+ static_assert(sizeof(DstScalar) < sizeof(std::int32_t), "");
+ if (quantization_flavor == QuantizationFlavor::kIntegerWithUniformMultiplier)
+ {
+ ruy_mul_params->set_multiplier_fixedpoint(params.multiplier_fixedpoint);
+ ruy_mul_params->set_multiplier_exponent(params.multiplier_exponent);
+ }
+ if (quantization_flavor == QuantizationFlavor::kIntegerWithPerRowMultiplier)
+ {
+ ruy_mul_params->set_multiplier_fixedpoint_perchannel(params.multiplier_fixedpoint_perchannel);
+ ruy_mul_params->set_multiplier_exponent_perchannel(params.multiplier_exponent_perchannel);
+ }
ruy_mul_params->set_bias(params.bias);
ruy_mul_params->set_clamp_min(params.clamp_min);
ruy_mul_params->set_clamp_max(params.clamp_max);
}
+// Raw-integer case with destination type int32.
+template <QuantizationFlavor quantization_flavor>
+void MakeRuyMulParams(const GemmParams<std::int32_t, std::int32_t, quantization_flavor> &params,
+ ::ruy::MulParams<std::int32_t, std::int32_t> *ruy_mul_params)
+{
+ ruy_mul_params->set_bias(params.bias);
+}
+
} // namespace ruy_support
} // namespace ruy
} // namespace nnfw
diff --git a/compute/ruy/include/ruy/operation/Conv.h b/compute/ruy/include/ruy/operation/Conv.h
index 2b9c8c390..3f03694bd 100644
--- a/compute/ruy/include/ruy/operation/Conv.h
+++ b/compute/ruy/include/ruy/operation/Conv.h
@@ -169,7 +169,7 @@ private:
ruy_support::MakeRuyMatrix(rhs_params, gemm_input_data, &ruy_rhs, true);
ruy_support::MakeRuyMatrix(dst_params, output_data, &ruy_dst);
- ::ruy::BasicSpec<float, float> ruy_mul_params;
+ ::ruy::MulParams<float, float> ruy_mul_params;
ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params);
::ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst);
diff --git a/compute/ruy/include/ruy/operation/FullyConnected.h b/compute/ruy/include/ruy/operation/FullyConnected.h
index 59facdb22..1d686b64b 100644
--- a/compute/ruy/include/ruy/operation/FullyConnected.h
+++ b/compute/ruy/include/ruy/operation/FullyConnected.h
@@ -68,7 +68,7 @@ inline void FullyConnected(const FullyConnectedParams &params, const Shape &inpu
ruy_support::MakeRuyMatrix(rhs_params, input_data, &ruy_rhs, true);
ruy_support::MakeRuyMatrix(dst_params, output_data, &ruy_dst);
- ::ruy::BasicSpec<float, float> ruy_mul_params;
+ ::ruy::MulParams<float, float> ruy_mul_params;
ruy_support::MakeRuyMulParams(gemm_params, &ruy_mul_params);
::ruy::Mul(ruy_lhs, ruy_rhs, ruy_mul_params, ruy_context, &ruy_dst);
diff --git a/compute/test/CMakeLists.txt b/compute/test/CMakeLists.txt
deleted file mode 100644
index 92aac3e72..000000000
--- a/compute/test/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-if(NOT ENABLE_TEST)
- return()
-endif(NOT ENABLE_TEST)
-
-set(TEST_COMPUTE test_compute)
-
-file(GLOB_RECURSE TESTS "*.cc")
-
-add_executable(${TEST_COMPUTE} ${TESTS})
-
-target_link_libraries(${TEST_COMPUTE} nnfw_lib_cker)
-target_link_libraries(${TEST_COMPUTE} gtest)
-target_link_libraries(${TEST_COMPUTE} gtest_main)
-target_link_libraries(${TEST_COMPUTE} ${LIB_PTHREAD} dl)
-add_test(${TEST_COMPUTE} ${TEST_COMPUTE})
-
-install(TARGETS ${TEST_COMPUTE} DESTINATION unittest_standalone)
diff --git a/docs/conf.py b/docs/conf.py
index 84197e6d6..409e5f79b 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors'
author = 'Samsung Research & contributors'
# The full version, including alpha/beta/rc tags
-release = '1.20.0'
+release = '1.21.0'
# -- General configuration ---------------------------------------------------
diff --git a/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md b/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md
index 1f8c0c289..57b2b787c 100644
--- a/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md
+++ b/docs/howto/how-to-build-runtime-tizen-gbs-rpi4.md
@@ -174,34 +174,26 @@ $ vi j2/etc/systemd/system/ip.service
and set as like:
```
[Service]
-Type=simple
Restart=always
RestartSec=1
User=root
-ExecStart=/bin/sh /bin/ip.sh
+ExecStart=/bin/sh -c "ifconfig eth0 192.168.x.y netmask 255.255.255.0 up"
[Install]
WantedBy=multi-user.target
```
+Replace 192.168.x.y to your actual ip address.
-(5-3) Add a new file
-```
-$ vi j2/bin/ip.sh
-```
-and set with IP address for your RPi4:
-```
-ifconfig eth0 192.168.x.y netmask 255.255.255.0 up
-```
-where you should update `192.168.x.y` part to your actual IP address.
-(5-4) Add a symbolic link
+(5-3) Add a symbolic link
```
+$ sudo mkdir -p j2/etc/systemd/system/multi-user.target.wants/
$ pushd j2/etc/systemd/system/multi-user.target.wants/
$ sudo ln -s ../../system/ip.service .
$ popd
```
-(5-5) Now that every thing is ready, unmount and unplug your memory card and plug into
+(5-4) Now that every thing is ready, unmount and unplug your memory card and plug into
RPi4, turn on the power.
```
$ sync
diff --git a/docs/release/1.20/index.rst b/docs/release/1.20/index.rst
new file mode 100644
index 000000000..082d867f3
--- /dev/null
+++ b/docs/release/1.20/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+ sphinx-quickstart on Tue Apr 26 10:18:12 2022.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.20
+====
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.20.0.md
diff --git a/docs/release/1.20/release-note-1.20.0.md b/docs/release/1.20/release-note-1.20.0.md
new file mode 100644
index 000000000..2c75e06af
--- /dev/null
+++ b/docs/release/1.20/release-note-1.20.0.md
@@ -0,0 +1,34 @@
+# Release Note 1.20.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- luci-interpreter supports multiple kernels with PAL layer including Cortext-M
+- luci-interpreter supports integer tensor for partly kernels
+- luci import support constant without coping to reduce memory for luci-interpreter
+- Reduce duplicate codes to package released modules
+- Limited support for ONNX LSTM/RNN unrolling while importing
+- Limited support for ARM32 cross build
+- Support new operator: SVDF
+- New virtual CircleVariable to support tensor with variable
+- Support quantization of BatchMatMul Op
+- Support mixed(UINT8 + INT16) quantization
+- Support backward propagation of quantization parameters
+- Upgrade default python to version 3.8
+- Support TensorFlow 2.8.0, ONNX-TF 1.10.0, ONNX 1.11.0
+- Upgrade circle schema to follow tflite schema v3b
+- Refactor to mio-tflite280, mio-circle04 with version and helpers methods
+- Use one flatbuffers 2.0 version
+- Drop support for TensorFlow 1.x
+- Fix for several bugs, performance enhancements, and typos
+
+## ONE Runtime
+
+### Introduce TRIX backend
+- TRIX backend supports trix binary with NHWC layout
+- TRIX backend supports trix binary with input/output of Q8 and Q16 type
+
+### API supports new data type
+- Symmetric Quantized int16 type named "NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED"
+
diff --git a/docs/release/1.21/index.rst b/docs/release/1.21/index.rst
new file mode 100644
index 000000000..587065f56
--- /dev/null
+++ b/docs/release/1.21/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+ sphinx-quickstart on Wed Sep 06 12:18:12 2022.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.21
+====
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.21.0.md
diff --git a/docs/release/1.21/release-note_1.21.0.md b/docs/release/1.21/release-note_1.21.0.md
new file mode 100644
index 000000000..49bf074b6
--- /dev/null
+++ b/docs/release/1.21/release-note_1.21.0.md
@@ -0,0 +1,35 @@
+# Release Note 1.21.0
+
+## ONE Compiler
+
+- Support unrolling of LSTM and RNN Ops in `one-import-onnx` tool
+- Introduced new tools `one-infer`, `circle-operator`, `circle-interpreter`
+- Introduced `Workflow`(WIP) in `one-cmds`
+- New option `quant_config` in `one-quantize`
+- New option `fake_quantize` in `one-quantize`
+- More Ops supported: Densify
+- More Ops for quantization: ReduceMax
+- More Ops for mixed-precision quantization (MPQ): LeakyRelu, Neg, Relu6, Squeeze
+- More Ops for `convert_nchw_to_nhwc` option: LogSoftmax, ReduceMax, SplitV, Softmax
+- New optimization options in `one-optimize`: `replace_non_const_fc_with_bmm`, `resolve_customop_splitv`, `fold_densify`
+- Improved reshape elimination in `convert_nchw_to_nhwc` option.
+- Support fusion of Channel-wise Add + Relu with TConv
+- Support negative axis in ArgMin/Max
+- Show errors for unrecognized options in `one-optimize`
+- Fix shape inference for `StridedSlice`
+- Fix FuseBatchNormWithTConvPass to support TConv with bias
+- Deprecate `--O1` option in `circle2circle`
+- Support gcc-11
+- Support limited Float16 for kernels constants with dequantization to Float32
+
+## ONE Runtime
+
+### Basic Multimodel nnpackage
+- Runtime supports to run nnpackage with two models
+
+### Channel Wise Quantization on Conv2D and Depthwise Conv2D
+- Conv2D and Depthwise Conv2D supports per-channel quantization of uint8 type.
+
+### Batch Execution with TRIX backend
+- TRIX backend supports batch execution which run in parallel with multicore
+
diff --git a/infra/cmake/modules/IdentifyPlatform.cmake b/infra/cmake/modules/IdentifyPlatform.cmake
index 6616283fb..890055fae 100644
--- a/infra/cmake/modules/IdentifyPlatform.cmake
+++ b/infra/cmake/modules/IdentifyPlatform.cmake
@@ -35,6 +35,8 @@ endif()
if("${HOST_ARCH}" STREQUAL "x86_64")
set(HOST_ARCH_BASE ${HOST_ARCH})
+elseif("${HOST_ARCH}" STREQUAL "armv7em")
+ set(HOST_ARCH_BASE "arm")
elseif("${HOST_ARCH}" STREQUAL "armv7l")
set(HOST_ARCH_BASE "arm")
elseif("${HOST_ARCH}" STREQUAL "armv7hl")
@@ -49,6 +51,8 @@ endif()
if("${TARGET_ARCH}" STREQUAL "x86_64")
set(TARGET_ARCH_BASE ${TARGET_ARCH})
+elseif("${TARGET_ARCH}" STREQUAL "armv7em")
+ set(TARGET_ARCH_BASE "arm")
elseif("${TARGET_ARCH}" STREQUAL "armv7l")
set(TARGET_ARCH_BASE "arm")
elseif("${TARGET_ARCH}" STREQUAL "armv7hl")
diff --git a/infra/cmake/packages/AbseilConfig.cmake b/infra/cmake/packages/AbseilConfig.cmake
index 6fae7211d..b3cb364e1 100644
--- a/infra/cmake/packages/AbseilConfig.cmake
+++ b/infra/cmake/packages/AbseilConfig.cmake
@@ -12,11 +12,18 @@ function(_Abseil_import)
# NOTE Turn off abseil testing
set(BUILD_TESTING OFF)
+ # Set -fPIC property because Abseil-cpp can be used for shared library
+ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+ # Abseil-cpp 20211102.0 show warning without below setting
+ set(ABSL_PROPAGATE_CXX_STD ON)
+
add_extdirectory("${AbseilSource_DIR}" ABSEIL)
add_library(abseil INTERFACE)
+
target_link_libraries(abseil INTERFACE
# From "Available Abseil CMake Public Targets" in CMake/README.md
+ # Add absl::status (It is not listed in CMake/README.md)
absl::algorithm
absl::base
absl::debugging
@@ -27,19 +34,14 @@ function(_Abseil_import)
absl::numeric
absl::random_random
absl::strings
- absl::status
absl::synchronization
absl::time
absl::utility
+ absl::status
)
endif(NOT TARGET abseil)
set(Abseil_FOUND TRUE PARENT_SCOPE)
endfunction(_Abseil_import)
-set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fPIC")
-set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fPIC")
-set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fPIC")
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fPIC")
-
_Abseil_import()
diff --git a/infra/cmake/packages/AbseilSourceConfig.cmake b/infra/cmake/packages/AbseilSourceConfig.cmake
index 8aeb86db3..0297c08bc 100644
--- a/infra/cmake/packages/AbseilSourceConfig.cmake
+++ b/infra/cmake/packages/AbseilSourceConfig.cmake
@@ -7,14 +7,13 @@ function(_AbseilSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- # NOTE TensorFlow 2.3 downloads abseil from the following URL
+ # NOTE TensorFlow 2.9 downloads abseil 20211102.0
envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
- envoption(ABSEIL_URL ${EXTERNAL_DOWNLOAD_SERVER}/abseil/abseil-cpp/archive/df3ea785d8c30a9503321a3d35ee7d35808f190d.tar.gz)
-
+ envoption(ABSEIL_URL ${EXTERNAL_DOWNLOAD_SERVER}/abseil/abseil-cpp/archive/20211102.0.tar.gz)
ExternalSource_Download(ABSEIL
DIRNAME ABSEIL
URL ${ABSEIL_URL}
- CHECKSUM MD5=4d9aa7e757adf48fef171c85f0d88552)
+ CHECKSUM MD5=bdca561519192543378b7cade101ec43)
set(AbseilSource_DIR ${ABSEIL_SOURCE_DIR} PARENT_SCOPE)
set(AbseilSource_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
index 99118c5d9..d1588d3fd 100644
--- a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
+++ b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
@@ -2,7 +2,8 @@ function(_CMSISSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(CMSIS_5_8_0_URL https://github.com/ARM-software/CMSIS_5/archive/refs/tags/5.8.0.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(CMSIS_5_8_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/CMSIS_5/archive/refs/tags/5.8.0.tar.gz)
set(CMSIS_5_8_0_SHA256 fe6b697b8782e7fd6131034b7646a3b65c83018774abf7f9f94901a3bc7c82ad)
ExternalSource_Download(CMSIS DIRNAME CMSIS-5.8.0 ${CMSIS_5_8_0_URL}
diff --git a/infra/cmake/packages/CaffeSourceConfig.cmake b/infra/cmake/packages/CaffeSourceConfig.cmake
index 41cc2c9f7..05eb5b30e 100644
--- a/infra/cmake/packages/CaffeSourceConfig.cmake
+++ b/infra/cmake/packages/CaffeSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_CaffeSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(CAFFE_URL https://github.com/BVLC/caffe/archive/1.0.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(CAFFE_URL ${EXTERNAL_DOWNLOAD_SERVER}/BVLC/caffe/archive/1.0.tar.gz)
ExternalSource_Download(CAFFE ${CAFFE_URL})
diff --git a/infra/cmake/packages/CpuInfoSourceConfig.cmake b/infra/cmake/packages/CpuInfoSourceConfig.cmake
index 60419ad9f..b93a6a2e5 100644
--- a/infra/cmake/packages/CpuInfoSourceConfig.cmake
+++ b/infra/cmake/packages/CpuInfoSourceConfig.cmake
@@ -8,8 +8,8 @@ function(_CpuInfoSource_import)
nnas_include(OptionTools)
envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
- # CPUINFO commit including patch from tflite v2.3
- envoption(CPUINFO_URL ${EXTERNAL_DOWNLOAD_SERVER}/pytorch/cpuinfo/archive/63b254577ed77a8004a9be6ac707f3dccc4e1fd9.tar.gz)
+ # CPUINFO commit from tflite v2.8
+ envoption(CPUINFO_URL ${EXTERNAL_DOWNLOAD_SERVER}/pytorch/cpuinfo/archive/5916273f79a21551890fd3d56fc5375a78d1598d.tar.gz)
ExternalSource_Download(CPUINFO
DIRNAME CPUINFO
URL ${CPUINFO_URL})
diff --git a/infra/cmake/packages/Egl_HeadersSourceConfig.cmake b/infra/cmake/packages/Egl_HeadersSourceConfig.cmake
new file mode 100644
index 000000000..fae57f6ce
--- /dev/null
+++ b/infra/cmake/packages/Egl_HeadersSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_Egl_HeadersSource_import)
+ if(NOT DOWNLOAD_EGL_HEADERS)
+ set(Egl_HeadersSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_EGL_HEADERS)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(EGL_HEADERS_URL ${EXTERNAL_DOWNLOAD_SERVER}/KhronosGroup/EGL-Registry/archive/649981109e263b737e7735933c90626c29a306f2.zip)
+
+ ExternalSource_Download(EGL_HEADERS
+ DIRNAME EGL_HEADERS
+ URL ${EGL_HEADERS_URL})
+
+ set(Egl_HeadersSource_DIR ${EGL_HEADERS_SOURCE_DIR} PARENT_SCOPE)
+ set(Egl_HeadersSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_Egl_HeadersSource_import)
+
+_Egl_HeadersSource_import()
diff --git a/infra/cmake/packages/FarmhashSourceConfig.cmake b/infra/cmake/packages/FarmhashSourceConfig.cmake
index a19c8b992..fa1867c5c 100644
--- a/infra/cmake/packages/FarmhashSourceConfig.cmake
+++ b/infra/cmake/packages/FarmhashSourceConfig.cmake
@@ -10,7 +10,8 @@ function(_FarmhashSource_import)
# NOTE TensorFlow 1.12 downloads farmhash from the following URL
# TensorFlow 1.13.1 downloads farmhash from the following URL
# TensorFlow 2.3.0 downloads farmhash from the following URL
- envoption(FARMHASH_1_12_URL https://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(FARMHASH_1_12_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz)
ExternalSource_Download(FARMHASH ${FARMHASH_1_12_URL})
diff --git a/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake
index a0a32aa9e..e094055b7 100644
--- a/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake
+++ b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_FlatBuffersSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(FLATBUFFERS_2_0_URL https://github.com/google/flatbuffers/archive/v2.0.0.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(FLATBUFFERS_2_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/flatbuffers/archive/v2.0.0.tar.gz)
ExternalSource_Download(FLATBUFFERS
DIRNAME FLATBUFFERS-2.0
CHECKSUM MD5=a27992324c3cbf86dd888268a23d17bd
diff --git a/infra/cmake/packages/Fp16SourceConfig.cmake b/infra/cmake/packages/Fp16SourceConfig.cmake
index 3623fd210..3df4e4cc5 100644
--- a/infra/cmake/packages/Fp16SourceConfig.cmake
+++ b/infra/cmake/packages/Fp16SourceConfig.cmake
@@ -9,7 +9,7 @@ function(_Fp16Source_import)
envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
# fp16 commit in xnnpack 8b283aa30a31
- envoption(FP16_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FP16/archive/3c54eacb74f6f5e39077300c5564156c424d77ba.tar.gz)
+ envoption(FP16_URL ${EXTERNAL_DOWNLOAD_SERVER}/Maratyszcza/FP16/archive/4dfe081cf6bcd15db339cf2680b9281b8451eeb3.tar.gz)
ExternalSource_Download(FP16
DIRNAME FP16
URL ${FP16_URL})
diff --git a/infra/cmake/packages/GEMMLowpSourceConfig.cmake b/infra/cmake/packages/GEMMLowpSourceConfig.cmake
index 6e1cfa9c9..3b3560359 100644
--- a/infra/cmake/packages/GEMMLowpSourceConfig.cmake
+++ b/infra/cmake/packages/GEMMLowpSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_GEMMLowpSource_import)
# NOTE TensorFlow 1.12 uses the following URL
# TensorFlow 1.13.1 uses the following URL
- envoption(GEMMLOWP_URL https://github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.tar.gz)
ExternalSource_Download(GEMMLOWP ${GEMMLOWP_URL})
diff --git a/infra/cmake/packages/GFlagsSourceConfig.cmake b/infra/cmake/packages/GFlagsSourceConfig.cmake
index 3e70d89fc..2f9b7537f 100644
--- a/infra/cmake/packages/GFlagsSourceConfig.cmake
+++ b/infra/cmake/packages/GFlagsSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_GFlagsSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(GFLAGS_URL https://github.com/gflags/gflags/archive/v2.2.1.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(GFLAGS_URL ${EXTERNAL_DOWNLOAD_SERVER}/gflags/gflags/archive/v2.2.1.tar.gz)
ExternalSource_Download(GFLAGS ${GFLAGS_URL})
diff --git a/infra/cmake/packages/GTestSourceConfig.cmake b/infra/cmake/packages/GTestSourceConfig.cmake
index e57d0965a..643c3d109 100644
--- a/infra/cmake/packages/GTestSourceConfig.cmake
+++ b/infra/cmake/packages/GTestSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_GTestSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(GTEST_URL https://github.com/google/googletest/archive/release-1.11.0.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(GTEST_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/googletest/archive/release-1.11.0.tar.gz)
ExternalSource_Download(GTEST ${GTEST_URL})
diff --git a/infra/cmake/packages/HDF5SourceConfig.cmake b/infra/cmake/packages/HDF5SourceConfig.cmake
index 9db048c86..3440dbd20 100644
--- a/infra/cmake/packages/HDF5SourceConfig.cmake
+++ b/infra/cmake/packages/HDF5SourceConfig.cmake
@@ -7,7 +7,8 @@ function(_HDF5Source_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(HDF5_URL https://github.com/HDFGroup/hdf5/archive/hdf5-1_8_16.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(HDF5_URL ${EXTERNAL_DOWNLOAD_SERVER}/HDFGroup/hdf5/archive/hdf5-1_8_16.tar.gz)
ExternalSource_Download(HDF5 ${HDF5_URL}
PATCH ${CMAKE_CURRENT_LIST_DIR}/HDF5Source.patch)
diff --git a/infra/cmake/packages/JsoncppSourceConfig.cmake b/infra/cmake/packages/JsoncppSourceConfig.cmake
index 3195ea479..8d672854b 100644
--- a/infra/cmake/packages/JsoncppSourceConfig.cmake
+++ b/infra/cmake/packages/JsoncppSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_JsoncppSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(JSONCPP_URL https://github.com/open-source-parsers/jsoncpp/archive/refs/tags/1.9.5.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(JSONCPP_URL ${EXTERNAL_DOWNLOAD_SERVER}/open-source-parsers/jsoncpp/archive/refs/tags/1.9.5.tar.gz)
ExternalSource_Download(JSONCPP ${JSONCPP_URL})
diff --git a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
index 805554538..e55647da8 100644
--- a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
+++ b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
@@ -2,7 +2,8 @@ function(_MbedOSSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(MBEDOS_6_15_URL https://github.com/ARMmbed/mbed-os/archive/refs/tags/mbed-os-6.15.0.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(MBEDOS_6_15_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARMmbed/mbed-os/archive/refs/tags/mbed-os-6.15.0.tar.gz)
set(MBEDOS_6_15_SHA256 529b04c41f3020ed8a62f12d47f2d3de87e1b07fb13708534534a587f7ea048e)
ExternalSource_Download(MBEDOS DIRNAME MBEDOS-6.15 ${MBEDOS_6_15_URL}
diff --git a/infra/cmake/packages/NEON2SSESourceConfig.cmake b/infra/cmake/packages/NEON2SSESourceConfig.cmake
index bd40267a5..82c71e2a8 100644
--- a/infra/cmake/packages/NEON2SSESourceConfig.cmake
+++ b/infra/cmake/packages/NEON2SSESourceConfig.cmake
@@ -8,10 +8,10 @@ function(_NEON2SSESource_import)
nnas_include(OptionTools)
# NOTE TensorFlow 1.13.1 downloads NEON2SSE from the following URL
- # NOTE TensorFlow 2.1 downloads NEON2SSE from the following URL
- # NOTE TensorFlow 2.2 downloads NEON2SSE from the following URL
- # NOTE TensorFlow 2.3 downloads NEON2SSE from the following URL
- envoption(NEON2SSE_URL https://github.com/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee60148671a786a71f.tar.gz)
+ # NOTE TensorFlow 2.8.0 downloads NEON2SSE from the following URL
+ # NOTE commit c12f8932c3be5aebaf35562d699f645686c4e2c3 will resolve build fail on debug build
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(NEON2SSE_URL ${EXTERNAL_DOWNLOAD_SERVER}/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee60148671a786a71f.tar.gz)
ExternalSource_Download(NEON2SSE ${NEON2SSE_URL})
diff --git a/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake b/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake
index c9fb5e490..fe21f6d3d 100644
--- a/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake
+++ b/infra/cmake/packages/ONNXSource-1.4.1/ONNXSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_ONNXSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(ONNX_1_4_1_URL https://github.com/onnx/onnx/archive/v1.4.1.zip)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(ONNX_1_4_1_URL ${EXTERNAL_DOWNLOAD_SERVER}/onnx/onnx/archive/v1.4.1.zip)
ExternalSource_Download(ONNX DIRNAME ONNX-1.4.1
CHECKSUM MD5=604b43a22fbc758f32ae9f3a4fb9d397
diff --git a/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake b/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake
index ef903f834..b2ad08b90 100644
--- a/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake
+++ b/infra/cmake/packages/ONNXSource-1.6.0/ONNXSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_ONNXSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(ONNX_1_6_0_URL https://github.com/onnx/onnx/archive/v1.6.0.zip)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(ONNX_1_6_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/onnx/onnx/archive/v1.6.0.zip)
ExternalSource_Download(ONNX DIRNAME ONNX-1.6.0
CHECKSUM MD5=cbdc547a527f1b59c7f066c8d258b966
diff --git a/infra/cmake/packages/OouraFFTSourceConfig.cmake b/infra/cmake/packages/OouraFFTSourceConfig.cmake
index be551fbe4..d84b5b20f 100644
--- a/infra/cmake/packages/OouraFFTSourceConfig.cmake
+++ b/infra/cmake/packages/OouraFFTSourceConfig.cmake
@@ -8,7 +8,8 @@ function(_OouraFFTSource_import)
nnas_include(OptionTools)
# NOTE TensorFlow 2.3 downloads OOURAFFT from the following URL
- envoption(OOURAFFT_URL https://github.com/petewarden/OouraFFT/archive/v1.0.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(OOURAFFT_URL ${EXTERNAL_DOWNLOAD_SERVER}/petewarden/OouraFFT/archive/v1.0.tar.gz)
ExternalSource_Download(OOURAFFT ${OOURAFFT_URL})
diff --git a/infra/cmake/packages/Opengl_HeadersSourceConfig.cmake b/infra/cmake/packages/Opengl_HeadersSourceConfig.cmake
new file mode 100644
index 000000000..c5a774a73
--- /dev/null
+++ b/infra/cmake/packages/Opengl_HeadersSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_Opengl_HeadersSource_import)
+ if(NOT DOWNLOAD_OPENGL_HEADERS)
+ set(Opengl_HeadersSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_OPENGL_HEADERS)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(OPENGL_HEADERS_URL ${EXTERNAL_DOWNLOAD_SERVER}/KhronosGroup/OpenGL-Registry/archive/0cb0880d91581d34f96899c86fc1bf35627b4b81.zip)
+
+ ExternalSource_Download(OPENGL_HEADERS
+ DIRNAME OPENGL_HEADERS
+ URL ${OPENGL_HEADERS_URL})
+
+ set(Opengl_HeadersSource_DIR ${OPENGL_HEADERS_SOURCE_DIR} PARENT_SCOPE)
+ set(Opengl_HeadersSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_Opengl_HeadersSource_import)
+
+_Opengl_HeadersSource_import()
diff --git a/infra/cmake/packages/ProtobufSourceConfig.cmake b/infra/cmake/packages/ProtobufSourceConfig.cmake
index baa49eeb0..a1704e53d 100644
--- a/infra/cmake/packages/ProtobufSourceConfig.cmake
+++ b/infra/cmake/packages/ProtobufSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_ProtobufSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(PROTOBUF_URL https://github.com/protocolbuffers/protobuf/archive/v3.5.2.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(PROTOBUF_URL ${EXTERNAL_DOWNLOAD_SERVER}/protocolbuffers/protobuf/archive/v3.5.2.tar.gz)
ExternalSource_Download(PROTOBUF ${PROTOBUF_URL}
PATCH ${CMAKE_CURRENT_LIST_DIR}/ProtobufSource.patch)
diff --git a/infra/cmake/packages/Pybind11SourceConfig.cmake b/infra/cmake/packages/Pybind11SourceConfig.cmake
index 76f51e4d3..2f6425355 100644
--- a/infra/cmake/packages/Pybind11SourceConfig.cmake
+++ b/infra/cmake/packages/Pybind11SourceConfig.cmake
@@ -7,7 +7,8 @@ function(_Pybind11Source_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(PYBIND11_URL https://github.com/pybind/pybind11/archive/v2.5.0.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(PYBIND11_URL ${EXTERNAL_DOWNLOAD_SERVER}/pybind/pybind11/archive/v2.5.0.tar.gz)
ExternalSource_Download(PYBIND11 ${PYBIND11_URL})
diff --git a/infra/cmake/packages/PytorchSourceConfig.cmake b/infra/cmake/packages/PytorchSourceConfig.cmake
index 0212f2f4b..94757f865 100644
--- a/infra/cmake/packages/PytorchSourceConfig.cmake
+++ b/infra/cmake/packages/PytorchSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_PytorchSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(PYTORCH_URL https://github.com/pytorch/pytorch/archive/v0.4.1.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(PYTORCH_URL ${EXTERNAL_DOWNLOAD_SERVER}/pytorch/pytorch/archive/v0.4.1.tar.gz)
ExternalSource_Download(PYTORCH ${PYTORCH_URL})
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake
index f84675596..8120ebca2 100644
--- a/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.1.0/TensorFlowEigenSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowEigenSource_import)
# Exact version used by TensorFlow v2.1.0.
# See tensorflow/tensorflow/workspace.bzl.
- envoption(TENSORFLOW_2_1_0_EIGEN_URL https://gitlab.com/libeigen/eigen/-/archive/4e696901f873a2347f76d931cf2f701e31e15d05/eigen-4e696901f873a2347f76d931cf2f701e31e15d05.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com")
+ envoption(TENSORFLOW_2_1_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/4e696901f873a2347f76d931cf2f701e31e15d05/eigen-4e696901f873a2347f76d931cf2f701e31e15d05.tar.gz)
ExternalSource_Download(EIGEN DIRNAME TENSORFLOW-2.1.0-EIGEN ${TENSORFLOW_2_1_0_EIGEN_URL})
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfig.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfig.cmake
new file mode 100644
index 000000000..6f59f0771
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_TensorFlowEigenSource_import)
+ if(NOT DOWNLOAD_EIGEN)
+ set(TensorFlowEigenSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_EIGEN)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ # Exact version used by TensorFlow v2.8.0.
+ # See tensorflow/third_party/eigen3/workspace.bzl.
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com")
+ envoption(TENSORFLOW_2_8_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/008ff3483a8c5604639e1c4d204eae30ad737af6/eigen-e1dd31ce174c3d26fbe38388f64b09d2adbd7557a59e90e6f545a288cc1755fc.tar.gz)
+
+ ExternalSource_Download(EIGEN DIRNAME TENSORFLOW-2.8.0-EIGEN ${TENSORFLOW_2_8_0_EIGEN_URL})
+
+ set(TensorFlowEigenSource_DIR ${EIGEN_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowEigenSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowEigenSource_import)
+
+_TensorFlowEigenSource_import()
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfigVersion.cmake
new file mode 100644
index 000000000..2ad2e241e
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.8.0/TensorFlowEigenSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake
index 035264fa9..421be6c66 100644
--- a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.1.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowGEMMLowpSource_import)
# Exact version used by TensorFlow v2.1.0.
# See tensorflow/tensorflow/workspace.bzl.
- envoption(TENSORFLOW_2_1_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_2_1_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip)
ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.1.0-GEMMLOWP ${TENSORFLOW_2_1_0_GEMMLOWP_URL})
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake
index bc13d6227..44c56a6be 100644
--- a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.3.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowGEMMLowpSource_import)
# Exact version used by TensorFlow v2.3.0.
# See tensorflow/tensorflow/workspace.bzl.
- envoption(TENSORFLOW_2_3_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_2_3_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.3.0-GEMMLOWP ${TENSORFLOW_2_3_0_GEMMLOWP_URL})
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake
index b7f3148e8..76cdfdd6c 100644
--- a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowGEMMLowpSource_import)
# Exact version used by TensorFlow v2.6.0.
# See tensorflow/third_party/gemmlowp/workspace.bzl.
- envoption(TENSORFLOW_2_6_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_2_6_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.6.0-GEMMLOWP ${TENSORFLOW_2_6_0_GEMMLOWP_URL})
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake
index f3663cc78..3e17490c3 100644
--- a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowGEMMLowpSource_import)
# Exact version used by TensorFlow v2.8.0.
# See tensorflow/third_party/gemmlowp/workspace.bzl.
- envoption(TENSORFLOW_2_8_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_2_8_0_GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.8.0-GEMMLOWP ${TENSORFLOW_2_8_0_GEMMLOWP_URL})
diff --git a/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake b/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake
index f1debe775..369816a5e 100644
--- a/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake
@@ -13,7 +13,7 @@ function(_TensorFlowGpuSource_Import)
set(PATCH_DONE "TRUE")
endif()
endif()
-
+
if(${PATCH_DONE} STREQUAL "TRUE")
message(STATUS "Skip downloading TensorFlowGpuSource")
set(TENSORFLOWGPU_SOURCE_DIR "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU" PARENT_SCOPE)
@@ -28,7 +28,8 @@ function(_TensorFlowGpuSource_Import)
# Download TFLite Source Code
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(TENSORFLOW_2_4_1_URL https://github.com/tensorflow/tensorflow/archive/v2.4.1.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_2_4_1_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.4.1.tar.gz)
ExternalSource_Download(TFLITE_GPU_DELEGATE DIRNAME TENSORFLOW-2.4.1 ${TENSORFLOW_2_4_1_URL})
# Patch for non used codes on onert backend/gpu_cl
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake
index 3dbf05ece..3a7dc893c 100644
--- a/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowRuySource-2.3.0/TensorFlowRuySourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowRuySource_import)
# Exact version used by TensorFlow v2.3.0.
# See tensorflow/third_party/ruy/workspace.bzl
- envoption(TENSORFLOW_2_3_0_RUY_URL https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_2_3_0_RUY_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip)
ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.3.0-RUY ${TENSORFLOW_2_3_0_RUY_URL})
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake
index b4dee914f..e4dd4f2bf 100644
--- a/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake
@@ -9,7 +9,8 @@ function(_TensorFlowRuySource_import)
# Exact version used by TensorFlow v2.6.0.
# See tensorflow/third_party/ruy/workspace.bzl
- envoption(TENSORFLOW_2_6_0_RUY_URL https://github.com/google/ruy/archive/e6c1b8dc8a8b00ee74e7268aac8b18d7260ab1ce.zip)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_2_6_0_RUY_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/ruy/archive/e6c1b8dc8a8b00ee74e7268aac8b18d7260ab1ce.zip)
ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.6.0-RUY ${TENSORFLOW_2_6_0_RUY_URL})
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfig.cmake
new file mode 100644
index 000000000..2ead7cd51
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_TensorFlowRuySource_import)
+ if(NOT DOWNLOAD_RUY)
+ set(TensorFlowRuySource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_RUY)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ # Exact version used by TensorFlow v2.8.0.
+ # See tensorflow/third_party/ruy/workspace.bzl
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_2_8_0_RUY_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/ruy/archive/e6c1b8dc8a8b00ee74e7268aac8b18d7260ab1ce.zip)
+
+ ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.8.0-RUY ${TENSORFLOW_2_8_0_RUY_URL})
+
+ set(TensorFlowRuySource_DIR ${RUY_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowRuySource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowRuySource_import)
+
+_TensorFlowRuySource_import()
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfigVersion.cmake
new file mode 100644
index 000000000..2ad2e241e
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.8.0/TensorFlowRuySourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake
index bcdf9f28c..33538c234 100644
--- a/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-1.14/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(TENSORFLOW_1_14_URL https://github.com/tensorflow/tensorflow/archive/v1.14.0.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_1_14_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v1.14.0.tar.gz)
ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-1.14 ${TENSORFLOW_1_14_URL})
diff --git a/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake
index 0d2a95056..aabc22f72 100644
--- a/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.1.0/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(TENSORFLOW_2_1_0_URL https://github.com/tensorflow/tensorflow/archive/v2.1.0.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_2_1_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.1.0.tar.gz)
ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.1.0 ${TENSORFLOW_2_1_0_URL})
diff --git a/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake
index 71220d743..7dabf88c8 100644
--- a/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.2.0/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(TENSORFLOW_2_2_0_URL https://github.com/tensorflow/tensorflow/archive/v2.2.0.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_2_2_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.2.0.tar.gz)
ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.2.0 ${TENSORFLOW_2_2_0_URL})
diff --git a/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake b/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake
index 82df579a1..967d49e87 100644
--- a/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.3.0-rc0Config.cmake
@@ -10,7 +10,8 @@ function(_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(TENSORFLOW_2_3_0_RC0_URL https://github.com/tensorflow/tensorflow/archive/v2.3.0-rc0.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_2_3_0_RC0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.3.0-rc0.tar.gz)
ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.3.0-RC0 ${TENSORFLOW_2_3_0_RC0_URL})
diff --git a/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake
index 5c3a0f8cc..0ad0cda0b 100644
--- a/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.3.0/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(TENSORFLOW_2_3_0_URL https://github.com/tensorflow/tensorflow/archive/v2.3.0.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_2_3_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.3.0.tar.gz)
ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.3.0 ${TENSORFLOW_2_3_0_URL})
diff --git a/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake
index 611c7c805..9a7af17b1 100644
--- a/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(TENSORFLOW_2_6_0_URL https://github.com/tensorflow/tensorflow/archive/v2.6.0.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_2_6_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.6.0.tar.gz)
ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.6.0 ${TENSORFLOW_2_6_0_URL})
diff --git a/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake
index 4abe2eae6..988a0f49f 100644
--- a/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake
+++ b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake
@@ -7,7 +7,8 @@ function(_TensorFlowSource_import)
nnas_include(ExternalSourceTools)
nnas_include(OptionTools)
- envoption(TENSORFLOW_2_8_0_URL https://github.com/tensorflow/tensorflow/archive/v2.8.0.tar.gz)
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(TENSORFLOW_2_8_0_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v2.8.0.tar.gz)
ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.8.0 ${TENSORFLOW_2_8_0_URL})
diff --git a/infra/cmake/packages/VulkanSourceConfig.cmake b/infra/cmake/packages/VulkanSourceConfig.cmake
new file mode 100644
index 000000000..76b69898e
--- /dev/null
+++ b/infra/cmake/packages/VulkanSourceConfig.cmake
@@ -0,0 +1,20 @@
+function(_VulkanSource_import)
+ if(NOT ${DOWNLOAD_VULKAN})
+ set(VulkanSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT ${DOWNLOAD_VULKAN})
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ envoption(VULKAN_URL ${EXTERNAL_DOWNLOAD_SERVER}/KhronosGroup/Vulkan-Headers/archive/ec2db85225ab410bc6829251bef6c578aaed5868.tar.gz)
+ ExternalSource_Download(VULKAN
+ DIRNAME VULKAN
+ URL ${VULKAN_URL})
+
+ set(VulkanSource_DIR ${VULKAN_SOURCE_DIR} PARENT_SCOPE)
+ set(VulkanSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_VulkanSource_import)
+
+_VulkanSource_import()
diff --git a/infra/command/format b/infra/command/format
index 5cf9606fa..993a6ad5a 100644
--- a/infra/command/format
+++ b/infra/command/format
@@ -154,11 +154,9 @@ function check_python_files() {
fi
# Check python files
- FILES_TO_CHECK_PYTHON=`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '\.py$'`
+ FILES_TO_CHECK_PYTHON=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '\.py$'`)
# Exceptional case: one-cmds don't have '.py' extension: ignore non-python source (cmake, etc) and ignore shell script: one-prepare-venv
- FILES_TO_CHECK_PYTHON=`echo "$FILES_TO_CHECK_PYTHON" | egrep -v '^compiler/one-cmds/.*\..*$' | egrep -v '^compiler/one-cmds/one-prepare-venv$'`
- # Transform to array
- FILES_TO_CHECK_PYTHON=($FILES_TO_CHECK_PYTHON)
+ FILES_TO_CHECK_PYTHON+=(`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '^compiler/one-cmds/[^(\./)]*$' | egrep -v '^compiler/one-cmds/one-prepare-venv$'`)
for s in ${DIRECTORIES_NOT_TO_BE_TESTED[@]}; do
skip=${s#'.'/}/
diff --git a/infra/command/gen-coverage-report b/infra/command/gen-coverage-report
index 3058aee9a..df6377d2a 100644
--- a/infra/command/gen-coverage-report
+++ b/infra/command/gen-coverage-report
@@ -69,10 +69,10 @@ done
opencl_files=($(find ./runtime/onert/backend/gpu_cl/open_cl/ \( -name "*.cc" -o -name "*.h" \) -exec realpath {} \; ))
-# Exclude *.test.cpp files from coverage report
+# Exclude test files from coverage report
# Exclude flatbuffer generated files from coverage report
"${LCOV_PATH}" -r "${EXTRACTED_COVERAGE_INFO_PATH}" -o "${EXCLUDED_COVERAGE_INFO_PATH}" \
- '*.test.cpp' '*_schema_generated.h' "${opencl_files[@]}"
+ '*.test.cpp' '*.test.cc' '*/test/*' '*/tests/*' '*_schema_generated.h' "${opencl_files[@]}"
# Final coverage data
cp -v ${EXCLUDED_COVERAGE_INFO_PATH} ${COVERAGE_INFO_PATH}
diff --git a/infra/debian/compiler/changelog b/infra/debian/compiler/changelog
index 2763ac55b..ddca70a17 100644
--- a/infra/debian/compiler/changelog
+++ b/infra/debian/compiler/changelog
@@ -1,3 +1,50 @@
+one (1.21.0) bionic; urgency=medium
+
+ * Support unrolling of LSTM and RNN Ops in `one-import-onnx` tool
+ * Introduced new tools `one-infer`, `circle-operator`, `circle-interpreter`
+ * Introduced `Workflow`(WIP) in `one-cmds`
+ * New option `quant_config` in `one-quantize`
+ * New option `fake_quantize` in `one-quantize`
+ * More Ops supported: Densify
+ * More Ops for quantization: ReduceMax
+ * More Ops for mixed-precision quantization (MPQ): LeakyRelu, Neg, Relu6, Squeeze
+ * More Ops for `convert_nchw_to_nhwc` option: LogSoftmax, ReduceMax, SplitV, Softmax
+ * New optimization options in `one-optimize`: `replace_non_const_fc_with_bmm`, `resolve_customop_splitv`, `fold_densify`
+ * Improved reshape elimination in `convert_nchw_to_nhwc` option.
+ * Support fusion of Channel-wise Add + Relu with TConv
+ * Support negative axis in ArgMin/Max
+ * Show errors for unrecognized options in `one-optimize`
+ * Fix shape inference for `StridedSlice`
+ * Fix FuseBatchNormWithTConvPass to support TConv with bias
+ * Deprecate `--O1` option in `circle2circle`
+ * Support gcc-11
+ * Support limited Float16 for kernels constants with dequantization to Float32
+
+ -- seongwoo <mhs4670go@naver.com> Wed, 06 Sep 2022 12:00:00 +0900
+
+one (1.20.0) bionic; urgency=medium
+
+ * luci-interpreter supports multiple kernels with PAL layer including Cortext-M
+ * luci-interpreter supports integer tensor for partly kernels
+ * luci import support constant without coping to reduce memory for luci-interpreter
+ * Reduce duplicate codes to package released modules
+ * Limited support for ONNX LSTM/RNN unrolling while importing
+ * Limited support for ARM32 cross build
+ * Support new operator: SVDF
+ * New virtual CircleVariable to support tensor with variable
+ * Support quantization of BatchMatMul Op
+ * Support mixed(UINT8 + INT16) quantization
+ * Support backward propagation of quantization parameters
+ * Upgrade default python to version 3.8
+ * Support TensorFlow 2.8.0, ONNX-TF 1.10.0, ONNX 1.11.0
+ * Upgrade circle schema to follow tflite schema v3b
+ * Refactor to mio-tflite280, mio-circle04 with version and helpers methods
+ * Use one flatbuffers 2.0 version
+ * Drop support for TensorFlow 1.x
+ * Fix for several bugs, performance enhancements, and typos
+
+ -- seongwoo <mhs4670go@naver.com> Tue, 26 Apr 2022 12:00:00 +0900
+
one (1.19.0) bionic; urgency=medium
* `circle-quantizer` supports input/output type option
diff --git a/infra/debian/compiler/docs/one-infer.1 b/infra/debian/compiler/docs/one-infer.1
new file mode 100644
index 000000000..a1bafbb12
--- /dev/null
+++ b/infra/debian/compiler/docs/one-infer.1
@@ -0,0 +1,46 @@
+.TH ONE-INFER "1" "July 2022" "one-infer version 1.21.0" "User Commands"
+.SH NAME
+one-infer \- manual page for one-infer version 1.21.0
+.SH DESCRIPTION
+usage: one\-infer [\-h] [\-v] [\-C CONFIG] [\-d DRIVER | \fB\-b\fR BACKEND] [\-\-post\-process POST_PROCESS] [\-\-] [COMMANDS FOR BACKEND DRIVER]
+.PP
+command line tool to infer model
+.SS "optional arguments:"
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fB\-d\fR DRIVER, \fB\-\-driver\fR DRIVER
+backend inference driver name to execute
+.TP
+\fB\-b\fR BACKEND, \fB\-\-backend\fR BACKEND
+backend name to use
+.TP
+\fB\-\-post\-process\fR POST_PROCESS
+post processing script to convert I/O data to standard
+format
+.SH COPYRIGHT
+Copyright \(co 2020\-2022 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-infer
+is maintained as a Texinfo manual. If the
+.B info
+and
+.B one-infer
+programs are properly installed at your site, the command
+.IP
+.B info one-infer
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/docs/one-partition.1 b/infra/debian/compiler/docs/one-partition.1
new file mode 100644
index 000000000..5b6fe933d
--- /dev/null
+++ b/infra/debian/compiler/docs/one-partition.1
@@ -0,0 +1,56 @@
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.6.
+.TH ONE-PARTITION "1" "June 2022" "one-partition version 1.21.0" "User Commands"
+.SH NAME
+one-partition \- manual page for one-partition version 1.21.0
+.SH DESCRIPTION
+usage: one\-partition [\-h] [\-v] [\-V] [\-C CONFIG] [\-\-backends BACKENDS]
+.TP
+[\-\-default DEFAULT] [\-\-part_file PART_FILE]
+[\-\-input_file INPUT_FILE] [\-\-work_path WORK_PATH]
+.PP
+command line tool to partition circle model by multiple backends
+.SS "optional arguments:"
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+show this help message and exit
+.TP
+\fB\-v\fR, \fB\-\-version\fR
+show program's version number and exit
+.TP
+\fB\-V\fR, \fB\-\-verbose\fR
+output additional information to stdout or stderr
+.TP
+\fB\-C\fR CONFIG, \fB\-\-config\fR CONFIG
+run with configuation file
+.TP
+\fB\-\-backends\fR BACKENDS
+backends in CSV to use for partitioning
+.TP
+\fB\-\-default\fR DEFAULT
+default backend to assign
+.TP
+\fB\-\-part_file\fR PART_FILE
+partition file which provides backend to assign
+.TP
+\fB\-\-input_file\fR INPUT_FILE
+input circle model filename
+.TP
+\fB\-\-work_path\fR WORK_PATH
+work path of partition, input files exist and output
+files are produced
+.SH COPYRIGHT
+Copyright \(co 2020\-2022 Samsung Electronics Co., Ltd. All Rights Reserved
+Licensed under the Apache License, Version 2.0
+https://github.com/Samsung/ONE
+.SH "SEE ALSO"
+The full documentation for
+.B one-partition
+is maintained as a Texinfo manual. If the
+.B info
+and
+.B one-partition
+programs are properly installed at your site, the command
+.IP
+.B info one-partition
+.PP
+should give you access to the complete manual.
diff --git a/infra/debian/compiler/one-compiler.install b/infra/debian/compiler/one-compiler.install
index 805ba8677..65e46d188 100644
--- a/infra/debian/compiler/one-compiler.install
+++ b/infra/debian/compiler/one-compiler.install
@@ -1,6 +1,8 @@
# {FILES_TO_INSTALL} {DEST_DIR}
# bin
usr/bin/circle2circle usr/share/one/bin/
+usr/bin/circle-eval-diff usr/share/one/bin/
+usr/bin/circle-operator usr/share/one/bin/
usr/bin/circle-partitioner usr/share/one/bin/
usr/bin/circle-quantizer usr/share/one/bin/
usr/bin/generate_bcq_metadata.py usr/share/one/bin/
@@ -16,14 +18,21 @@ usr/bin/one-import-bcq usr/share/one/bin/
usr/bin/one-import-onnx usr/share/one/bin/
usr/bin/one-import-tf usr/share/one/bin/
usr/bin/one-import-tflite usr/share/one/bin/
+usr/bin/one-infer usr/share/one/bin/
usr/bin/one-optimize usr/share/one/bin/
usr/bin/one-pack usr/share/one/bin/
+usr/bin/one-partition usr/share/one/bin/
usr/bin/one-prepare-venv usr/share/one/bin/
usr/bin/one-profile usr/share/one/bin/
usr/bin/one-quantize usr/share/one/bin/
usr/bin/one-version usr/share/one/bin/
usr/bin/onelib/constant.py usr/share/one/bin/onelib/
usr/bin/onelib/make_cmd.py usr/share/one/bin/onelib/
+usr/bin/onelib/CfgRunner.py usr/share/one/bin/onelib/
+usr/bin/onelib/OptionBuilder.py usr/share/one/bin/onelib/
+usr/bin/onelib/TopologicalSortHelper.py usr/share/one/bin/onelib/
+usr/bin/onelib/WorkflowRunner.py usr/share/one/bin/onelib/
+usr/bin/onnx_legalizer.py usr/share/one/bin/
usr/bin/rawdata2hdf5 usr/share/one/bin/
usr/bin/record-minmax usr/share/one/bin/
usr/bin/tf2nnpkg usr/share/one/bin/
diff --git a/infra/debian/compiler/one-compiler.manpages b/infra/debian/compiler/one-compiler.manpages
index 77f2f4e46..e0284ae4e 100644
--- a/infra/debian/compiler/one-compiler.manpages
+++ b/infra/debian/compiler/one-compiler.manpages
@@ -1,5 +1,6 @@
debian/docs/one-build.1
debian/docs/one-codegen.1
+debian/docs/one-infer.1
debian/docs/one-import.1
debian/docs/one-import-bcq.1
debian/docs/one-import-onnx.1
@@ -7,6 +8,7 @@ debian/docs/one-import-tf.1
debian/docs/one-import-tflite.1
debian/docs/one-optimize.1
debian/docs/one-pack.1
+debian/docs/one-partition.1
debian/docs/one-profile.1
debian/docs/one-quantize.1
debian/docs/onecc.1
diff --git a/infra/debian/runtime/changelog b/infra/debian/runtime/changelog
index 4cf0abc30..e07c50c21 100644
--- a/infra/debian/runtime/changelog
+++ b/infra/debian/runtime/changelog
@@ -1,3 +1,18 @@
+one (1.21.0) bionic; urgency=low
+
+ * Runtime supports to run nnpackage with two models
+ * Conv2D and Depthwise Conv2D supports per-channel quantization of uint8 type.
+ * TRIX backend supports batch execution which run in parallel with multicore
+
+ -- Chunseok Lee <chunseok.lee@samsung.com> Tue, 06 Sep 2022 12:00:00 +0900
+
+one (1.20.0) bionic; urgency=low
+
+ * Introduce TRIX backend
+ * API supports new data type NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED
+
+ -- Chunseok Lee <chunseok.lee@samsung.com> Wed, 26 Apr 2022 12:00:00 +0900
+
one (1.19.0) bionic; urgency=low
* Synch up version with ONE Compiler
diff --git a/infra/debian/runtime/rules b/infra/debian/runtime/rules
index dee87a9ed..97170ee09 100755
--- a/infra/debian/runtime/rules
+++ b/infra/debian/runtime/rules
@@ -3,7 +3,7 @@ DEBVER := $(shell dpkg-parsechangelog -SVersion)
export DH_VERBOSE = 1
export _DESTDIR = debian/tmp/
export BUILD_TYPE=release
-export OPTIONS=-DBUILD_LOGGING=0 -DBUILD_TFLITE_COMPARATOR_TEST_TOOL=0 -DBUILD_NNPACKAGE_RUN=0 -DBUILD_TFLITE_RUN=0 -DBUILD_NNAPI_TEST=0 -DBUILD_RUNTIME_NNAPI_TEST=0 -DBUILD_TFLITE_BENCHMARK_MODEL=0 -DBUILD_TFLITE_VANILLA_RUN=0 -DBUILD_TENSORFLOW_LITE_2_3_0=0 -DBUILD_TENSORFLOW_LITE=0
+export OPTIONS=-DBUILD_LOGGING=0 -DBUILD_TFLITE_COMPARATOR_TEST_TOOL=0 -DBUILD_NNPACKAGE_RUN=0 -DBUILD_TFLITE_RUN=0 -DBUILD_NNAPI_TEST=0 -DBUILD_RUNTIME_NNAPI_TEST=0 -DBUILD_TFLITE_BENCHMARK_MODEL=0 -DBUILD_TFLITE_VANILLA_RUN=0 -DBUILD_TENSORFLOW_LITE_2_8_0=0 -DBUILD_TENSORFLOW_LITE=0
export DEBIAN_BUILD=1
export INSTALL_PATH=debian/tmp/usr/
%:
diff --git a/infra/docker/bionic/Dockerfile b/infra/docker/bionic/Dockerfile
index dbc22a6e8..f7ffc73fd 100644
--- a/infra/docker/bionic/Dockerfile
+++ b/infra/docker/bionic/Dockerfile
@@ -86,7 +86,7 @@ RUN echo 'deb [trusted=yes] http://download.tizen.org/tools/latest-release/Ubunt
RUN apt-get update && apt-get -qqy install gbs
RUN wget http://download.tizen.org/sdk/tizenstudio/official/binary/sdb_3.1.4_ubuntu-64.zip -O sdb.zip
RUN unzip -d tmp sdb.zip && rm sdb.zip
-RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp
+RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp/*
# Install java
RUN apt-get install -y --no-install-recommends openjdk-8-jdk
diff --git a/infra/docker/focal/Dockerfile b/infra/docker/focal/Dockerfile
index 6f3cd9b60..1cdeffbb7 100644
--- a/infra/docker/focal/Dockerfile
+++ b/infra/docker/focal/Dockerfile
@@ -46,7 +46,7 @@ RUN echo 'deb [trusted=yes] http://download.tizen.org/tools/latest-release/Ubunt
RUN apt-get update && apt-get -qqy install gbs
RUN wget http://download.tizen.org/sdk/tizenstudio/official/binary/sdb_4.2.19_ubuntu-64.zip -O sdb.zip
RUN unzip -d tmp sdb.zip && rm sdb.zip
-RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp
+RUN cp tmp/data/tools/sdb /usr/bin/. && rm -rf tmp/*
# Clean archives (to reduce image size)
RUN apt-get clean -y
diff --git a/infra/nncc/CMakeLists.txt b/infra/nncc/CMakeLists.txt
index 2ff5a5f6a..768d7972b 100644
--- a/infra/nncc/CMakeLists.txt
+++ b/infra/nncc/CMakeLists.txt
@@ -1,4 +1,7 @@
-cmake_minimum_required(VERSION 3.1)
+# The libboost 1.74 uses IN_LIST operator, which requires the policy CMP0057, in a CMake file.
+# This policy requires ``cmake_minimum_required(VERSION 3.3)``.
+# Run "cmake --help-policy CMP0057" for policy details.
+cmake_minimum_required(VERSION 3.3)
project(nncc)
diff --git a/infra/nncc/cmake/options/options_armv7em-generic.cmake b/infra/nncc/cmake/options/options_armv7em-generic.cmake
new file mode 100644
index 000000000..d671b73f1
--- /dev/null
+++ b/infra/nncc/cmake/options/options_armv7em-generic.cmake
@@ -0,0 +1,3 @@
+#
+# armv7em generic cmake options
+#
diff --git a/infra/nnfw/CMakeLists.txt b/infra/nnfw/CMakeLists.txt
index 897a16fbf..2a27eee59 100644
--- a/infra/nnfw/CMakeLists.txt
+++ b/infra/nnfw/CMakeLists.txt
@@ -55,6 +55,12 @@ macro(nnas_find_package PREFIX)
)
endmacro(nnas_find_package)
+# C++14 feature requires 5 or later
+# Using std::unordered_map shows build fail under 6.2
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2)
+ message(FATAL "Runtime build requires GNU Compiler version 6.2 or later.")
+endif()
+
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_EXTENSIONS OFF)
diff --git a/infra/nnfw/cmake/ApplyCompileFlags.cmake b/infra/nnfw/cmake/ApplyCompileFlags.cmake
index b042b0c42..b1c7ff568 100644
--- a/infra/nnfw/cmake/ApplyCompileFlags.cmake
+++ b/infra/nnfw/cmake/ApplyCompileFlags.cmake
@@ -31,3 +31,13 @@ endforeach()
foreach(FLAG ${FLAGS_CXXONLY})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
endforeach()
+
+# lib pthread as a variable (finding pthread build option must be disabled on android)
+# Define here to use on external lib build
+set(LIB_PTHREAD lib_pthread)
+add_library(${LIB_PTHREAD} INTERFACE)
+if(NOT TARGET_OS STREQUAL "android")
+ # Get compile option (ex. "-pthread" on linux GNU build tool)
+ find_package(Threads)
+ target_link_libraries(${LIB_PTHREAD} INTERFACE Threads::Threads)
+endif()
diff --git a/infra/nnfw/cmake/CfgOptionFlags.cmake b/infra/nnfw/cmake/CfgOptionFlags.cmake
index 5371120ad..440f1859a 100644
--- a/infra/nnfw/cmake/CfgOptionFlags.cmake
+++ b/infra/nnfw/cmake/CfgOptionFlags.cmake
@@ -31,6 +31,8 @@ option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" ON)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" ON)
option(INSTALL_TEST_SCRIPTS "Install test scripts" ON)
option(BUILD_GPU_CL "Build gpu_cl backend" OFF)
+option(BUILD_NPUD "Build NPU daemon" ON)
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" ON)
#
# Default build configuration for contrib
#
@@ -72,9 +74,10 @@ option(DOWNLOAD_OOURAFFT "Download Ooura FFT source" ON)
option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" ON)
option(BUILD_BOOST "Build boost source" OFF)
option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" ON)
-option(BUILD_TENSORFLOW_LITE_2_3_0 "Build TensorFlow Lite 2.3.0 from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE_2_8_0 "Build TensorFlow Lite 2.8.0 from the downloaded source" OFF)
option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" OFF)
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" ON)
+option(DEBUG_ARMCOMPUTE "Build ARM Compute as debug type" OFF)
option(BUILD_RUY "Build ruy library from the downloaded source" ON)
option(BUILD_CPUINFO "Build cpuinfo library from the downloaded source" ON)
option(PROFILE_RUY "Enable ruy library profiling" OFF)
diff --git a/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake b/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake
index e0c81dee7..fb63b3c47 100644
--- a/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake
+++ b/infra/nnfw/cmake/buildtool/config/config_aarch64-android.cmake
@@ -1,8 +1,5 @@
include("cmake/buildtool/config/config_linux.cmake")
-# On Android, pthread is contained in bionic(libc)
-set(LIB_PTHREAD "")
-
# SIMD for aarch64
set(FLAGS_COMMON ${FLAGS_COMMON}
"-ftree-vectorize"
diff --git a/infra/nnfw/cmake/buildtool/config/config_linux.cmake b/infra/nnfw/cmake/buildtool/config/config_linux.cmake
index 86dd0f217..01b47ef4a 100644
--- a/infra/nnfw/cmake/buildtool/config/config_linux.cmake
+++ b/infra/nnfw/cmake/buildtool/config/config_linux.cmake
@@ -2,20 +2,11 @@
# linux common compile options
#
-# remove warning from arm cl
+# Remove warning: ignoring attributes on template argument (ACL, Eigen, etc)
# https://github.com/ARM-software/ComputeLibrary/issues/330
-set(GCC_VERSION_DISABLE_WARNING 6.0)
-if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER GCC_VERSION_DISABLE_WARNING)
- message(STATUS "GCC version higher than ${GCC_VERSION_DISABLE_WARNING}")
- set(FLAGS_CXXONLY ${FLAGS_CXXONLY}
- "-Wno-ignored-attributes"
- )
-endif()
+set(FLAGS_CXXONLY ${FLAGS_CXXONLY} "-Wno-ignored-attributes")
# Disable annoying ABI compatibility warning.
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
list(APPEND FLAGS_CXXONLY "-Wno-psabi")
endif()
-
-# lib pthread as a variable (pthread must be disabled on android)
-set(LIB_PTHREAD pthread)
diff --git a/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake b/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake
index dbd45fc03..52d6c6b2b 100644
--- a/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake
+++ b/infra/nnfw/cmake/buildtool/config/config_x86_64-darwin.cmake
@@ -7,6 +7,3 @@ message(STATUS "Building for x86-64 Darwin")
set(FLAGS_COMMON ${FLAGS_COMMON}
"-msse4"
)
-
-# lib pthread as a variable (pthread must be disabled on android)
-set(LIB_PTHREAD pthread)
diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake
index 3356aa72d..07b26a937 100644
--- a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake
+++ b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-linux.cmake
@@ -21,12 +21,6 @@ endif()
set(CMAKE_SYSROOT ${ROOTFS_DIR})
set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR})
-set(CMAKE_SHARED_LINKER_FLAGS
- "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
- CACHE INTERNAL "" FORCE)
-set(CMAKE_EXE_LINKER_FLAGS
- "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
- CACHE INTERNAL "" FORCE)
# search for programs in the build host directories
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake
index 4d5d7ac56..cab7325dd 100644
--- a/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake
+++ b/infra/nnfw/cmake/buildtool/cross/toolchain_aarch64-tizen.cmake
@@ -23,12 +23,6 @@ endif()
set(CMAKE_SYSROOT ${ROOTFS_DIR})
set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR})
-set(CMAKE_SHARED_LINKER_FLAGS
- "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
- CACHE INTERNAL "" FORCE)
-set(CMAKE_EXE_LINKER_FLAGS
- "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
- CACHE INTERNAL "" FORCE)
# search for programs in the build host directories
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake
index 8f2cb6735..c69259f85 100644
--- a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake
+++ b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-linux.cmake
@@ -21,12 +21,6 @@ endif()
set(CMAKE_SYSROOT ${ROOTFS_DIR})
set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR})
-set(CMAKE_SHARED_LINKER_FLAGS
- "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
- CACHE INTERNAL "" FORCE)
-set(CMAKE_EXE_LINKER_FLAGS
- "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
- CACHE INTERNAL "" FORCE)
# search for programs in the build host directories
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
diff --git a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake
index 72513cdc1..181415df2 100644
--- a/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake
+++ b/infra/nnfw/cmake/buildtool/cross/toolchain_armv7l-tizen.cmake
@@ -23,12 +23,6 @@ endif()
set(CMAKE_SYSROOT ${ROOTFS_DIR})
set(CMAKE_FIND_ROOT_PATH ${ROOTFS_DIR})
-set(CMAKE_SHARED_LINKER_FLAGS
- "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
- CACHE INTERNAL "" FORCE)
-set(CMAKE_EXE_LINKER_FLAGS
- "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
- CACHE INTERNAL "" FORCE)
# search for programs in the build host directories
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
@@ -46,10 +40,6 @@ add_compile_options(-mfpu=neon-vfpv4)
add_compile_options(-mfloat-abi=softfp)
add_compile_options(--sysroot=${ROOTFS_DIR})
-set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}")
-
-set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}")
-
include_directories(SYSTEM ${ROOTFS_DIR}/usr/lib/gcc/${TIZEN_TOOLCHAIN}/include/c++/)
include_directories(SYSTEM ${ROOTFS_DIR}/usr/lib/gcc/${TIZEN_TOOLCHAIN}/include/c++/armv7l-tizen-linux-gnueabi)
add_compile_options(-Wno-deprecated-declarations) # compile-time option
diff --git a/infra/nnfw/cmake/options/options_aarch64-android.cmake b/infra/nnfw/cmake/options/options_aarch64-android.cmake
index 9332f5299..e95ccca63 100644
--- a/infra/nnfw/cmake/options/options_aarch64-android.cmake
+++ b/infra/nnfw/cmake/options/options_aarch64-android.cmake
@@ -10,3 +10,5 @@ option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
option(DOWNLOAD_BOOST "Download boost source" ON)
option(BUILD_BOOST "Build boost source" ON)
option(BUILD_LOGGING "Build logging runtime" OFF)
+# Do not support npud
+option(BUILD_NPUD "Build NPU daemon" OFF)
diff --git a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
index eab3b0a92..9b487d93c 100644
--- a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
@@ -9,6 +9,7 @@ option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
option(BUILD_LOGGING "Build logging runtime" OFF)
option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+option(ENVVAR_NPUD_CONFIG "Use environment variable for npud configuration" OFF)
option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON)
diff --git a/infra/nnfw/cmake/options/options_x86_64-tizen.cmake b/infra/nnfw/cmake/options/options_x86_64-tizen.cmake
index 31b7fd6fb..eea37224d 100644
--- a/infra/nnfw/cmake/options/options_x86_64-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_x86_64-tizen.cmake
@@ -2,6 +2,7 @@
# x86_64 linux cmake options
#
option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
diff --git a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
index 6ae7dea34..f6a4efd96 100644
--- a/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
+++ b/infra/nnfw/cmake/packages/ARMComputeConfig.cmake
@@ -90,11 +90,11 @@ function(_ARMCompute_Build ARMComputeInstall_DIR)
return()
endif(NOT SCONS_PATH)
- if(CMAKE_BUILD_TYPE)
- string(TOLOWER "${CMAKE_BUILD_TYPE}" SCON_BUILD_TYPE)
- else(CMAKE_BUILD_TYPE)
+ if(DEBUG_ARMCOMPUTE)
+ set(SCON_BUILD_TYPE "debug")
+ else(DEBUG_ARMCOMPUTE)
set(SCON_BUILD_TYPE "release")
- endif(CMAKE_BUILD_TYPE)
+ endif(DEBUG_ARMCOMPUTE)
#### Architecture-specific configurations
diff --git a/infra/nnfw/cmake/packages/CpuInfoConfig.cmake b/infra/nnfw/cmake/packages/CpuInfoConfig.cmake
index 878026d9a..dddec8988 100644
--- a/infra/nnfw/cmake/packages/CpuInfoConfig.cmake
+++ b/infra/nnfw/cmake/packages/CpuInfoConfig.cmake
@@ -16,14 +16,18 @@ function(_CpuInfo_Build)
nnas_include(ExternalProjectTools)
- set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL "Build command-line tools")
- set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL "Build cpuinfo unit tests")
- set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE BOOL "Build cpuinfo mock tests")
- set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "Build cpuinfo micro-benchmarks")
+ # Set build option
+ # - Static (position independent)
+ # - No logging
+ # - Library only (CPUINFO_RUNTIME_TYPE is not used)
+ set(CPUINFO_LIBRARY_TYPE "static" CACHE STRING "")
+ set(CPUINFO_LOG_LEVEL "none" CACHE STRING "")
+ set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL "")
+ set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL "")
+ set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE BOOL "")
+ set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "")
add_extdirectory("${CpuInfoSource_DIR}" cpuinfo EXCLUDE_FROM_ALL)
set_target_properties(cpuinfo PROPERTIES POSITION_INDEPENDENT_CODE ON)
- # Suppress warnings generated by clog
- set_target_properties(clog PROPERTIES COMPILE_FLAGS "-Wno-unused-result")
set(CpuInfoSource_DIR ${CpuInfoSource_DIR} PARENT_SCOPE)
set(CpuInfo_FOUND TRUE PARENT_SCOPE)
endfunction(_CpuInfo_Build)
diff --git a/infra/nnfw/cmake/packages/GLib2.0Config.cmake b/infra/nnfw/cmake/packages/GLib2.0Config.cmake
new file mode 100644
index 000000000..d4c6bf241
--- /dev/null
+++ b/infra/nnfw/cmake/packages/GLib2.0Config.cmake
@@ -0,0 +1,41 @@
+function(_GLIB_2_0_import)
+ find_library(GLIB_LIBRARIES
+ NAMES glib-2.0)
+
+ get_filename_component(GLIB_LIBRARY_DIR ${GLIB_LIBRARIES} DIRECTORY)
+ find_path(GLIBCONFIG_INCLUDE_DIR
+ NAMES glibconfig.h
+ PATHS ${GLIB_LIBRARY_DIR}
+ PATH_SUFFIXES glib-2.0/include
+ NO_CMAKE_FIND_ROOT_PATH)
+
+ find_path(GLIB_INCLUDE_DIR
+ NAMES glib.h
+ PATH_SUFFIXES glib-2.0)
+
+ set(GLIB_FOUND TRUE)
+
+ if(NOT GLIB_LIBRARIES)
+ set(GLIB_FOUND FALSE)
+ endif(NOT GLIB_LIBRARIES)
+
+ if(NOT GLIBCONFIG_INCLUDE_DIR)
+ set(GLIB_FOUND FALSE)
+ endif(NOT GLIBCONFIG_INCLUDE_DIR)
+
+ if(NOT GLIB_INCLUDE_DIR)
+ set(GLIB_FOUND FALSE)
+ endif(NOT GLIB_INCLUDE_DIR)
+
+ set(GLIB_INCLUDE_DIRS ${GLIB_INCLUDE_DIR} ${GLIBCONFIG_INCLUDE_DIR})
+
+ if(NOT GLIB_FOUND)
+ message(STATUS "Failed to find GLib 2.0")
+ endif(NOT GLIB_FOUND)
+
+ set(GLIB2.0_FOUND ${GLIB_FOUND} PARENT_SCOPE)
+ set(GLIB2.0_INCLUDE_DIRS ${GLIB_INCLUDE_DIRS} PARENT_SCOPE)
+ set(GLIB2.0_LIBRARIES ${GLIB_LIBRARIES} PARENT_SCOPE)
+endfunction(_GLIB_2_0_import)
+
+_GLIB_2_0_import()
diff --git a/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt b/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt
index 9140a17a7..a1c4656e3 100644
--- a/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt
+++ b/infra/nnfw/cmake/packages/Ruy/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(RUY_BASE ${RuySource_DIR}/ruy)
+set(RUY_BASE ${TensorFlowRuySource_DIR}/ruy)
#
# Ruy library
@@ -14,7 +14,6 @@ list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/example_advanced.cc")
list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/tune_tool.cc")
list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/pmu.cc")
list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/create_trmul_params.cc")
-list(REMOVE_ITEM RUY_SRCS "${RUY_BASE}/prepare_packed_matrices.cc")
list(APPEND RUY_INSTRUMENTATION_SRCS "${RUY_BASE}/profiler/instrumentation.cc")
@@ -23,7 +22,7 @@ if(PROFILE_RUY)
list(APPEND RUY_PROFILER_SRCS "${RUY_BASE}/profiler/treeview.cc")
endif(PROFILE_RUY)
-list(APPEND RUY_INCLUDES "${RuySource_DIR}")
+list(APPEND RUY_INCLUDES "${TensorFlowRuySource_DIR}")
add_library(ruy STATIC ${RUY_SRCS})
target_include_directories(ruy SYSTEM PUBLIC ${RUY_INCLUDES})
diff --git a/infra/nnfw/cmake/packages/RuyConfig.cmake b/infra/nnfw/cmake/packages/RuyConfig.cmake
index 4e7cc24ac..6f5f4b71e 100644
--- a/infra/nnfw/cmake/packages/RuyConfig.cmake
+++ b/infra/nnfw/cmake/packages/RuyConfig.cmake
@@ -5,14 +5,14 @@ function(_Ruy_Build)
return()
endif(TARGET ruy)
- nnas_find_package(RuySource QUIET)
+ nnas_find_package(TensorFlowRuySource EXACT 2.8 QUIET)
nnfw_find_package(CpuInfo QUIET)
- if(NOT RuySource_FOUND)
+ if(NOT TensorFlowRuySource_FOUND)
message(STATUS "RUY: Source not found")
set(Ruy_FOUND FALSE PARENT_SCOPE)
return()
- endif(NOT RuySource_FOUND)
+ endif(NOT TensorFlowRuySource_FOUND)
if (NOT CpuInfo_FOUND)
message(STATUS "RUY: CPUINFO not found")
@@ -20,6 +20,17 @@ function(_Ruy_Build)
return()
endif(NOT CpuInfo_FOUND)
+ # Ruy's cmake requires cmake >= 3.14
+ # If we ready cmake >= 3.14, enable below comment out code
+ #if(PROFILE_RUY)
+ # # Will be used on ruy build
+ # set(RUY_PROFILER ON)
+ #endif(PROFILE_RUY)
+ #add_extdirectory("${RuySource_DIR}" Ruy)
+ #
+ ## Ignore warning from ruy
+ #target_compile_options(ruy INTERFACE -Wno-comment)
+
add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/Ruy" ruy)
set(Ruy_FOUND TRUE PARENT_SCOPE)
endfunction(_Ruy_Build)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt
index 9a7b240e9..f872b88cd 100644
--- a/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLite/CMakeLists.txt
@@ -52,6 +52,12 @@ target_compile_definitions(tensorflow-lite PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FA
set_property(TARGET tensorflow-lite PROPERTY POSITION_INDEPENDENT_CODE ON)
target_link_libraries(tensorflow-lite eigen-tf-1.13.1 flatbuffers::flatbuffers ${LIB_PTHREAD} dl)
+# Define TF_LITE_DISABLE_X86_NEON for debug build
+# If we upgrade NEON2SSE version, we can remove below line
+if(NEON2SSESource_FOUND)
+ target_compile_definitions(tensorflow-lite PRIVATE $<$<CONFIG:Debug>:TF_LITE_DISABLE_X86_NEON>)
+endif(NEON2SSESource_FOUND)
+
if(ANDROID)
target_link_libraries(tensorflow-lite log)
target_include_directories(tensorflow-lite PUBLIC "${NDK_DIR}/..")
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLite/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLite/CMakeLists.txt
deleted file mode 100644
index afee6e1cc..000000000
--- a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLite/CMakeLists.txt
+++ /dev/null
@@ -1,96 +0,0 @@
-# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
-#
-# Tensorflow Lite library 2.3.0
-#
-set(TENSORFLOW_LITE_BASE ${TensorFlowSource_DIR}/tensorflow/lite)
-
-file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c"
- "${TENSORFLOW_LITE_BASE}/*.cc"
- "${TENSORFLOW_LITE_BASE}/core/*.cc")
-
-file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
-
-file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
-
-file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c"
- "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
-
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc")
-list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc")
-
-file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc")
-
-file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc")
-
-list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_EXPERIMENTAL_SRCS})
-list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS})
-
-# externals
-list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg.c")
-list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg2d.c")
-
-# Build with mmap? true
-# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch.
-set(BUILD_WITH_MMAP TRUE)
-if(${BUILD_WITH_MMAP})
- list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc")
-else()
- list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation.cc")
-endif()
-
-# Build with nnapi? true
-# caution: this nnapi delegate comes from tflite, not ours.
-set(BUILD_WITH_NNAPI TRUE)
-if(${BUILD_WITH_NNAPI})
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate.cc")
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/quant_lstm_sup.cc")
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation.cc")
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_util.cc")
-else()
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate_disabled.cc")
- list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation_disabled.cc")
-endif()
-
-# ios: we don't support ios
-list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_ios.cc")
-
-# android
-if(NOT ANDROID)
- list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_android.cc")
-endif()
-
-# exclude some source files
-file(GLOB_RECURSE TFLITE_EXCLS "${TENSORFLOW_LITE_BASE}/*test*.cc"
- "${TENSORFLOW_LITE_BASE}/*benchmark*.cc"
- "${TENSORFLOW_LITE_BASE}/*example*.cc"
- "${TENSORFLOW_LITE_BASE}/*tool*.cc")
-list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS})
-
-# include headers
-list(APPEND TFLITE_INCLUDES "${TensorFlowSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${TensorFlowGEMMLowpSource_DIR}")
-list(APPEND TFLITE_INCLUDES "${Fp16Source_DIR}/include")
-
-if(NEON2SSESource_FOUND)
- list(APPEND TFLITE_INCLUDES "${NEON2SSESource_DIR}")
-endif(NEON2SSESource_FOUND)
-
-add_library(tensorflow-lite-2.3.0 STATIC ${TFLITE_SRCS})
-target_include_directories(tensorflow-lite-2.3.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
-target_include_directories(tensorflow-lite-2.3.0 PRIVATE ${CpuInfoSource_DIR})
-target_compile_definitions(tensorflow-lite-2.3.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO")
-set_property(TARGET tensorflow-lite-2.3.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
-target_link_libraries(tensorflow-lite-2.3.0 eigen flatbuffers::flatbuffers ruy abseil farmhash ${LIB_PTHREAD} dl)
-if(NOT ANDROID AND ${BUILD_WITH_NNAPI})
- target_link_libraries(tensorflow-lite-2.3.0 rt)
-endif()
-
-if(ANDROID)
- target_link_libraries(tensorflow-lite-2.3.0 log)
- target_include_directories(tensorflow-lite-2.3.0 PUBLIC "${NDK_DIR}/..")
-endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfig.cmake
deleted file mode 100644
index c81958cf4..000000000
--- a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfig.cmake
+++ /dev/null
@@ -1,44 +0,0 @@
-if(BUILD_TENSORFLOW_LITE_2_3_0)
- macro(return_unless VAR)
- if(NOT ${VAR})
- message("TFLiteVanillaRun: ${VAR} NOT TRUE")
- set(TensorFlowLite_2_3_0_FOUND FALSE PARENT_SCOPE)
- return()
- endif(NOT ${VAR})
- endmacro(return_unless)
-
- nnas_include(ExternalSourceTools)
- nnas_include(OptionTools)
-
- nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
- return_unless(TensorFlowSource_FOUND)
-
- # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/tensorflow/workspace.bzl
- nnas_find_package(AbseilSource QUIET)
- return_unless(AbseilSource_FOUND)
- nnfw_find_package(Eigen QUIET)
- return_unless(Eigen_FOUND)
- nnas_find_package(Farmhash QUIET)
- return_unless(Farmhash_FOUND)
- nnfw_find_package(FlatBuffers QUIET)
- return_unless(FlatBuffers_FOUND)
- nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
- return_unless(TensorFlowGEMMLowpSource_FOUND)
- nnas_find_package(OouraFFTSource QUIET)
- return_unless(OouraFFTSource_FOUND)
- nnfw_find_package(Ruy QUIET)
- return_unless(Ruy_FOUND)
-
- # TensorFlow Lite requires FP16 library's header only
- nnas_find_package(Fp16Source QUIET)
- return_unless(Fp16Source_FOUND)
-
- # Optional packages
- nnas_find_package(NEON2SSESource QUIET)
-
- nnas_include(ExternalProjectTools)
- add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite" tflite-2.3.0)
-
- set(TensorFlowLite_2_3_0_FOUND TRUE)
- return()
-endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfigVersion.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfigVersion.cmake
deleted file mode 100644
index 08e637421..000000000
--- a/infra/nnfw/cmake/packages/TensorFlowLite-2.3.0/TensorFlowLiteConfigVersion.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-set(PACKAGE_VERSION "2.3.0")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
- set(PACKAGE_VERSION_EXACT TRUE)
- set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLite/CMakeLists.txt b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLite/CMakeLists.txt
new file mode 100644
index 000000000..d7e1d0666
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLite/CMakeLists.txt
@@ -0,0 +1,121 @@
+# Reference: https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/lite/tools/make/Makefile
+#
+# Tensorflow Lite library 2.3.0
+#
+set(TENSORFLOW_LITE_BASE ${TensorFlowSource_DIR}/tensorflow/lite)
+
+file(GLOB TFLITE_CORE_SRCS "${TENSORFLOW_LITE_BASE}/*.c"
+ "${TENSORFLOW_LITE_BASE}/*.cc"
+ "${TENSORFLOW_LITE_BASE}/core/*.cc")
+
+file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
+
+file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
+
+file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c"
+ "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
+
+list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/memory_info.cc")
+list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/time.cc")
+list(APPEND TFLITE_PROFILING_SRCS "${TENSORFLOW_LITE_BASE}/profiling/platform_profiler.cc")
+
+file(GLOB TFLITE_EXPERIMENTAL_SRCS "${TENSORFLOW_LITE_BASE}/experimental/resource/*.cc")
+
+file(GLOB TFLITE_SCHEMA_UTIL_SRCS "${TENSORFLOW_LITE_BASE}/schema/*.cc")
+
+# Moved to kerenls/internal/utils
+#file(GLOB TFLITE_SPARSITY_SRCS "${TENSORFLOW_LITE_BASE}/tools/optimize/sparsity/*.cc")
+
+list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_PROFILING_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_EXPERIMENTAL_SRCS})
+#list(APPEND TFLITE_SRCS ${TFLITE_SPARSITY_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_SCHEMA_UTIL_SRCS})
+
+# externals
+list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg.c")
+list(APPEND TFLITE_SRCS "${OouraFFTSource_DIR}/fftsg2d.c")
+
+# Build with mmap? true
+# caution: v2.3.0's Makefile has wrong code on this part. This is fixed on master branch.
+set(BUILD_WITH_MMAP TRUE)
+if(${BUILD_WITH_MMAP})
+ list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation_disabled.cc")
+else()
+ list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/mmap_allocation.cc")
+endif()
+
+# Build with nnapi? true
+# caution: this nnapi delegate comes from tflite, not ours.
+set(BUILD_WITH_NNAPI TRUE)
+if(${BUILD_WITH_NNAPI})
+ list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate.cc")
+ list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/quant_lstm_sup.cc")
+ list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/utils.cc")
+ list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/serialization.cc")
+ list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation.cc")
+ list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_util.cc")
+else()
+ list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/delegates/nnapi/nnapi_delegate_disabled.cc")
+ list(APPEND TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/nnapi/nnapi_implementation_disabled.cc")
+endif()
+
+# ios: we don't support ios
+list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_ios.cc")
+
+# android
+if(NOT ANDROID)
+ list(REMOVE_ITEM TFLITE_SRCS "${TENSORFLOW_LITE_BASE}/minimal_logging_android.cc")
+endif()
+
+# exclude some source files
+file(GLOB_RECURSE TFLITE_EXCLS "${TENSORFLOW_LITE_BASE}/*test*.cc"
+ "${TENSORFLOW_LITE_BASE}/*benchmark*.cc"
+ "${TENSORFLOW_LITE_BASE}/*example*.cc"
+ "${TENSORFLOW_LITE_BASE}/*tool*.cc")
+list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_EXCLS})
+
+# exclude some kernels (requires python3-dev package)
+# TODO Enable these kernels by installing package on build system
+file(GLOB_RECURSE TFLITE_KERNEL_EXCLS "${TENSORFLOW_LITE_BASE}/kernels/variable_ops_wrapper.cc"
+ "${TENSORFLOW_LITE_BASE}/kernels/gradient/*.cc"
+ "${TENSORFLOW_LITE_BASE}/kernels/perception/*.cc")
+list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_KERNEL_EXCLS})
+
+# exclude kernel shim
+file(GLOB_RECURSE TFLITE_SHIM_EXCLS "${TENSORFLOW_LITE_BASE}/kernels/shim/*.cc")
+list(REMOVE_ITEM TFLITE_SRCS ${TFLITE_SHIM_EXCLS})
+
+# include headers
+list(APPEND TFLITE_INCLUDES "${TensorFlowSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${TensorFlowGEMMLowpSource_DIR}")
+list(APPEND TFLITE_INCLUDES "${Fp16Source_DIR}/include")
+#list(APPEND TFLITE_INCLUDES "${Pybind11Source_DIR}/include")
+
+if(NEON2SSESource_FOUND)
+ list(APPEND TFLITE_INCLUDES "${NEON2SSESource_DIR}")
+endif(NEON2SSESource_FOUND)
+
+add_library(tensorflow-lite-2.8.0 STATIC ${TFLITE_SRCS})
+target_include_directories(tensorflow-lite-2.8.0 SYSTEM PUBLIC ${TFLITE_INCLUDES})
+target_include_directories(tensorflow-lite-2.8.0 PRIVATE ${CpuInfoSource_DIR})
+target_compile_definitions(tensorflow-lite-2.8.0 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK -DTFLITE_WITH_RUY -DTFLITE_WITH_RUY_GEMV -DRUY_HAVE_CPUINFO")
+set_property(TARGET tensorflow-lite-2.8.0 PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(tensorflow-lite-2.8.0 eigen flatbuffers::flatbuffers ruy abseil farmhash ${LIB_PTHREAD} dl)
+if(NOT ANDROID AND ${BUILD_WITH_NNAPI})
+ target_link_libraries(tensorflow-lite-2.8.0 rt)
+endif()
+
+# Define TF_LITE_DISABLE_X86_NEON for debug build
+# If we upgrade NEON2SSE version, we can remove below line
+if(NEON2SSESource_FOUND)
+ target_compile_definitions(tensorflow-lite-2.8.0 PRIVATE $<$<CONFIG:Debug>:TF_LITE_DISABLE_X86_NEON>)
+endif(NEON2SSESource_FOUND)
+
+if(ANDROID)
+ target_link_libraries(tensorflow-lite-2.8.0 log)
+ target_include_directories(tensorflow-lite-2.8.0 PUBLIC "${NDK_DIR}/..")
+endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfig.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfig.cmake
new file mode 100644
index 000000000..1c8061812
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfig.cmake
@@ -0,0 +1,50 @@
+if(BUILD_TENSORFLOW_LITE_2_8_0)
+ macro(return_unless VAR)
+ if(NOT ${VAR})
+ message("TFLite 2.8: ${VAR} NOT TRUE")
+ set(TensorFlowLite_2_8_0_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT ${VAR})
+ endmacro(return_unless)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
+ return_unless(TensorFlowSource_FOUND)
+
+ # Below urls come from https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/tensorflow/workspace.bzl
+ nnas_find_package(AbseilSource QUIET)
+ return_unless(AbseilSource_FOUND)
+ nnfw_find_package(Eigen QUIET)
+ return_unless(Eigen_FOUND)
+ nnas_find_package(Farmhash QUIET)
+ return_unless(Farmhash_FOUND)
+ nnfw_find_package(FlatBuffers QUIET)
+ return_unless(FlatBuffers_FOUND)
+ nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET)
+ return_unless(TensorFlowGEMMLowpSource_FOUND)
+ nnas_find_package(OouraFFTSource QUIET)
+ return_unless(OouraFFTSource_FOUND)
+ nnfw_find_package(Ruy QUIET)
+ return_unless(Ruy_FOUND)
+
+ # TensorFlow Lite requires FP16 library's header only
+ nnas_find_package(Fp16Source QUIET)
+ return_unless(Fp16Source_FOUND)
+
+ # TensorFlow Lite requires Pybind11 library's header only
+ # But Pybind11 requires python3-dev package
+ # TODO Enable below by installing package on build system
+ #nnas_find_package(Pybind11Source QUIET)
+ #return_unless(Pybind11Source_FOUND)
+
+ # Optional packages
+ nnas_find_package(NEON2SSESource QUIET)
+
+ nnas_include(ExternalProjectTools)
+ add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLite" tflite-2.8.0)
+
+ set(TensorFlowLite_2_8_0_FOUND TRUE)
+ return()
+endif()
diff --git a/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfigVersion.cmake b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfigVersion.cmake
new file mode 100644
index 000000000..cd49d7b72
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TensorFlowLite-2.8.0/TensorFlowLiteConfigVersion.cmake
@@ -0,0 +1,9 @@
+set(PACKAGE_VERSION "2.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/nnfw/config/gbs.conf b/infra/nnfw/config/gbs.conf
index 1150a5fc8..2b5994a1b 100644
--- a/infra/nnfw/config/gbs.conf
+++ b/infra/nnfw/config/gbs.conf
@@ -3,20 +3,11 @@
profile = profile.tizen
[profile.tizen]
-user=obs_viewer
-obs = obs.tizen
-repos = repo.tizen_one,repo.tizen_base,repo.tizen_mobile
+repos = repo.tizen_base,repo.tizen_mobile
buildroot = /home/GBS-ROOT/
-[obs.tizen]
-url = http://api.tizen.org
-
[repo.tizen_mobile]
url = http://download.tizen.org/snapshots/tizen/unified/latest/repos/standard/packages/
[repo.tizen_base]
url = http://download.tizen.org/snapshots/tizen/base/latest/repos/standard/packages/
-
-[repo.tizen_one]
-url = http://13.125.34.93/archive/tizen/
-
diff --git a/infra/packaging/preset/20220323 b/infra/packaging/preset/20220323
index 421106c35..0eac1064f 100644
--- a/infra/packaging/preset/20220323
+++ b/infra/packaging/preset/20220323
@@ -20,21 +20,26 @@ function preset_configure()
# loco IR and related utilities
REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
# Flatbuffer I/O
- REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04")
+ REQUIRED_UNITS+=("mio-tflite280" "mio-circle04")
# Data I/O
REQUIRED_UNITS+=("dio-hdf5")
# Circle compiler library (.circle -> .circle)
REQUIRED_UNITS+=("luci")
# Tools
- REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+ REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
REQUIRED_UNITS+=("circle-tensordump" "circledump")
- REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+ REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
REQUIRED_UNITS+=("luci-eval-driver")
REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
- REQUIRED_UNITS+=("circle-partitioner")
+ REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+ REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
REQUIRED_UNITS+=("one-cmds")
REQUIRED_UNITS+=("bcq-tools")
+ # Dependent modules needed for build
+ REQUIRED_UNITS+=("circlechef")
+ REQUIRED_UNITS+=("circle-verify")
+
NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
# TODO Use "nncc configure" and "nncc build"
diff --git a/infra/packaging/preset/20220323_windows b/infra/packaging/preset/20220323_windows
index 60500b1e0..14917b3dd 100644
--- a/infra/packaging/preset/20220323_windows
+++ b/infra/packaging/preset/20220323_windows
@@ -15,20 +15,26 @@ function preset_configure()
# loco IR and related utilities
REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
# Flatbuffer I/O
- REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04")
+ REQUIRED_UNITS+=("mio-tflite280" "mio-circle04")
# Data I/O
REQUIRED_UNITS+=("dio-hdf5")
# Circle compiler library (.circle -> .circle)
REQUIRED_UNITS+=("luci")
# Tools
- REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
- REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+ REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef")
+ REQUIRED_UNITS+=("circle-tensordump" "circledump")
+ REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter")
REQUIRED_UNITS+=("luci-eval-driver")
REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
- REQUIRED_UNITS+=("circle-partitioner")
+ REQUIRED_UNITS+=("circle-eval-diff" "circle-interpreter")
+ REQUIRED_UNITS+=("circle-partitioner" "circle-operator")
REQUIRED_UNITS+=("one-cmds")
REQUIRED_UNITS+=("bcq-tools")
+ # Dependent modules needed for build
+ REQUIRED_UNITS+=("circlechef")
+ REQUIRED_UNITS+=("circle-verify")
+
NPROC=$(cat /proc/cpuinfo | grep -c processor)
# TODO Use "nncc configure" and "nncc build"
diff --git a/infra/packaging/res/tf2nnpkg.20220323 b/infra/packaging/res/tf2nnpkg.20220323
index 0d44818a1..5f43b2386 100644
--- a/infra/packaging/res/tf2nnpkg.20220323
+++ b/infra/packaging/res/tf2nnpkg.20220323
@@ -104,6 +104,6 @@ fi
${ONE_IMPORT_BCQ_SCRIPT}
# optimize
-"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+"${ROOT}/bin/circle2circle" --resolve_customop_add "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
"${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/scripts/compiler_modules.sh b/infra/scripts/compiler_modules.sh
index 6a857d2c8..51cba92f9 100644
--- a/infra/scripts/compiler_modules.sh
+++ b/infra/scripts/compiler_modules.sh
@@ -1,5 +1,8 @@
#!/bin/bash
+# NOTE this file is sourced from, for the purpose of
+# - configure_compiler_coverage.sh: to get test coverage for release criteria
+
# Don't run this script
[[ "${BASH_SOURCE[0]}" == "${0}" ]] && echo "Please don't execute ${BASH_SOURCE[0]}, source it" && return
@@ -8,13 +11,14 @@ DEBUG_BUILD_ITEMS+=";oops;pepper-assert;pepper-csv2vec"
DEBUG_BUILD_ITEMS+=";hermes;hermes-std"
DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo"
DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone"
-DEBUG_BUILD_ITEMS+=";safemain;mio-circle04;mio-tflite;mio-tflite260;mio-tflite280"
+DEBUG_BUILD_ITEMS+=";safemain;mio-circle04;mio-tflite280;dio-hdf5"
DEBUG_BUILD_ITEMS+=";tflite2circle"
DEBUG_BUILD_ITEMS+=";luci"
DEBUG_BUILD_ITEMS+=";luci-interpreter"
DEBUG_BUILD_ITEMS+=";luci-eval-driver;luci-pass-value-test;luci-value-test"
DEBUG_BUILD_ITEMS+=";circle2circle;record-minmax;circle-quantizer"
-DEBUG_BUILD_ITEMS+=";circle-partitioner;circle-part-driver"
+DEBUG_BUILD_ITEMS+=";circle-eval-diff"
+DEBUG_BUILD_ITEMS+=";circle-partitioner;circle-part-driver;circle-operator"
DEBUG_BUILD_ITEMS+=";circle-verify"
DEBUG_BUILD_ITEMS+=";circle-tensordump"
DEBUG_BUILD_ITEMS+=";tflchef;circlechef"
@@ -25,3 +29,5 @@ DEBUG_BUILD_ITEMS+=";tf2tfliteV2;tf2tfliteV2-conversion-test"
DEBUG_BUILD_ITEMS+=";tflite2circle-conversion-test"
DEBUG_BUILD_ITEMS+=";pota-quantization-value-test"
DEBUG_BUILD_ITEMS+=";circle-part-value-test"
+DEBUG_BUILD_ITEMS+=";circle-quantizer-dredd-recipe-test"
+DEBUG_BUILD_ITEMS+=";circle-operator-test"
diff --git a/infra/scripts/docker_build_nncc.sh b/infra/scripts/docker_build_nncc.sh
index 7146141bb..2e603b550 100755
--- a/infra/scripts/docker_build_nncc.sh
+++ b/infra/scripts/docker_build_nncc.sh
@@ -27,13 +27,13 @@ else
fi
# prepare tensorflow
-if [ -d $TENSORFLOW_PREFIX ]; then
+if [ -n "$TENSORFLOW_PREFIX" ]; then
DOCKER_OPTS+=" -v $TENSORFLOW_PREFIX:/opt/tensorflow"
CONFIG_OPTIONS+=" -DTENSORFLOW_PREFIX=/opt/tensorflow"
fi
# prepare onnx
-if [ -d $ONNXRUNTIME_PREFIX ]; then
+if [ -n "$ONNXRUNTIME_PREFIX" ]; then
DOCKER_OPTS+=" -v $ONNXRUNTIME_PREFIX:/opt/onnxruntime"
CONFIG_OPTIONS+=" -DONNXRUNTIME_PREFIX=/opt/onnxruntime"
fi
diff --git a/infra/scripts/docker_build_test_x64.sh b/infra/scripts/docker_build_test_x64.sh
index 26d8de4a9..b3428e083 100755
--- a/infra/scripts/docker_build_test_x64.sh
+++ b/infra/scripts/docker_build_test_x64.sh
@@ -32,8 +32,8 @@ pushd $ROOT_PATH > /dev/null
export DOCKER_ENV_VARS
export DOCKER_VOLUMES
export BUILD_OPTIONS
-# Disable nnpackage_run build: mismatch between buildtool for CI and installed hdf5
-CMD="export OPTIONS='-DBUILD_NNPACKAGE_RUN=OFF $BUILD_OPTIONS' && \
+
+CMD="export OPTIONS='$BUILD_OPTIONS' && \
export BUILD_TYPE=Release && \
cp -nv Makefile.template Makefile && \
make all install build_test_suite"
diff --git a/infra/scripts/docker_collect_nnpkg_resources.sh b/infra/scripts/docker_collect_nnpkg_resources.sh
index 06cf8809a..afdd3b9cb 100755
--- a/infra/scripts/docker_collect_nnpkg_resources.sh
+++ b/infra/scripts/docker_collect_nnpkg_resources.sh
@@ -28,13 +28,13 @@ else
fi
# prepare tensorflow
-if [ -d $TENSORFLOW_PREFIX ]; then
+if [ -n "$TENSORFLOW_PREFIX" ]; then
DOCKER_OPTS+=" -v $TENSORFLOW_PREFIX:/opt/tensorflow"
CONFIG_OPTIONS+=" -DTENSORFLOW_PREFIX=/opt/tensorflow"
fi
# prepare onnx
-if [ -d $ONNXRUNTIME_PREFIX ]; then
+if [ -n "$ONNXRUNTIME_PREFIX" ]; then
DOCKER_OPTS+=" -v $ONNXRUNTIME_PREFIX:/opt/onnxruntime"
CONFIG_OPTIONS+=" -DONNXRUNTIME_PREFIX=/opt/onnxruntime"
fi
@@ -71,7 +71,7 @@ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
# Circle compiler library (.circle -> .circle)
REQUIRED_UNITS+=("luci")
# Flatbuffer I/O
-REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04")
+REQUIRED_UNITS+=("mio-tflite280" "mio-circle04")
# Tools
REQUIRED_UNITS+=("tflite2circle" "circle2circle" "luci-interpreter")
REQUIRED_UNITS+=("souschef" "tflchef" "circlechef" "circle-verify")
diff --git a/infra/scripts/test_ubuntu_runtime_mixed.sh b/infra/scripts/test_ubuntu_runtime_mixed.sh
index 697fed897..2510d9c85 100755
--- a/infra/scripts/test_ubuntu_runtime_mixed.sh
+++ b/infra/scripts/test_ubuntu_runtime_mixed.sh
@@ -55,8 +55,8 @@ echo "GeneratedTests.squeeze_relaxed" >> $SKIPLIST_PREFIX.union
# Run the test
export OP_BACKEND_Conv2D="cpu"
-export OP_BACKEND_MaxPool2D="acl_cl"
-export OP_BACKEND_AvgPool2D="acl_neon"
+export OP_BACKEND_Pool2D="acl_cl"
+export OP_BACKEND_FullyConnected="acl_neon"
export ACL_LAYOUT="NCHW"
export RUY_THREADS=4
NNAPIGTest "acl_cl;acl_neon;cpu" "Product/out/unittest/nnapi_gtest.skip.${TEST_ARCH}-${TEST_OS}.union" "report/mixed"
diff --git a/infra/scripts/unittest_compiler_xml.sh b/infra/scripts/unittest_compiler_xml.sh
index 46d3bc813..6e9e8ad7f 100755
--- a/infra/scripts/unittest_compiler_xml.sh
+++ b/infra/scripts/unittest_compiler_xml.sh
@@ -7,7 +7,9 @@ set -eo pipefail
CURRENT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_PATH="$CURRENT_PATH/../../"
NNCC_WORKSPACE=${NNCC_WORKSPACE:-${ROOT_PATH}build}
-UNITTEST_REPORT_DIR=${NNCC_WORKSPACE}/unittest_compiler_xml
+
+# Use fixed absolute report dir for CI
+UNITTEST_REPORT_DIR=${ROOT_PATH}build/unittest_compiler_xml
for i in "$@"
do
@@ -25,5 +27,10 @@ fi
for TEST_BIN in `find ${NNCC_WORKSPACE}/compiler -type f -executable -name *_test`; do
TEST_NAME="$(basename -- $TEST_BIN)"
- LUGI_LOG=999 $TEST_BIN --gtest_output="xml:$UNITTEST_REPORT_DIR/$TEST_NAME.xml"
+ TEST_DIR="$(dirname $TEST_BIN)"
+
+ # Execute on test directory to find related file
+ pushd $TEST_DIR > /dev/null
+ LUGI_LOG=999 ./$TEST_NAME --gtest_output="xml:$UNITTEST_REPORT_DIR/$TEST_NAME.xml"
+ popd > /dev/null
done
diff --git a/nnpackage/examples/README.md b/nnpackage/examples/README.md
index fb0bae35e..951048bec 100644
--- a/nnpackage/examples/README.md
+++ b/nnpackage/examples/README.md
@@ -1,5 +1,12 @@
# NNPackage example
+## Package version 1.3.0
+
+### two_tflites
+
+- Model file: two TensorFlow Lite models
+- It has two tflite models with pkg-input, pkg-output and model-connect fields.
+
## Package version 1.1.0
### one_op_in_tflite
diff --git a/nnpackage/examples/v1.3.0/two_tflites/README.md b/nnpackage/examples/v1.3.0/two_tflites/README.md
new file mode 100644
index 000000000..3fcbe2d90
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/README.md
@@ -0,0 +1,28 @@
+## How to create
+
+```
+$ wget https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz
+$ tar -zxf mobilenet_v1_1.0_224.tgz
+
+$ python tools/tflitefile_tool/select_operator.py mobilenet_v1_1.0_224.tflite <( echo 0-1 ) mv1.0_1.tflite
+$ python tools/tflitefile_tool/select_operator.py mv1.0_1.tflite <( echo 0 ) mv1.0.tflite
+$ python tools/tflitefile_tool/select_operator.py mv1.0_1.tflite <( echo 1 ) mv1.1.tflite
+
+# make sure three tflite is valid
+$ ./Product/out/bin/tflite_comparator mv1.0_1.tflite
+$ ./Product/out/bin/tflite_comparator mv1.0.tflite
+$ ./Product/out/bin/tflite_comparator mv1.1.tflite
+
+$ tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh -m mv1.0.tflite mv1.1.tflite -p two_tflites
+$ cat two_tflites/metadata/MANIFEST
+{
+ "major-version" : "1",
+ "minor-version" : "2",
+ "patch-version" : "0",
+ "configs" : [ ],
+ "models" : [ "mv1.0.tflite", "mv1.1.tflite" ],
+ "model-types" : [ "tflite", "tflite" ]
+}
+
+# update minor-version, and add additional fields manually
+```
diff --git a/nnpackage/examples/v1.3.0/two_tflites/metadata/MANIFEST b/nnpackage/examples/v1.3.0/two_tflites/metadata/MANIFEST
new file mode 100644
index 000000000..9d9e21ac1
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/metadata/MANIFEST
@@ -0,0 +1,11 @@
+{
+ "major-version" : "1",
+ "minor-version" : "3",
+ "patch-version" : "0",
+ "configs" : [ ],
+ "models" : [ "mv1.0.tflite", "mv1.1.tflite" ],
+ "model-types" : [ "tflite", "tflite" ],
+ "pkg-inputs" : [ "0:0:0" ],
+ "pkg-outputs" : [ "1:0:0" ],
+ "model-connect" : [ { "from" : "0:0:0", "to" : [ "1:0:0" ] } ]
+}
diff --git a/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/expected.h5 b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/expected.h5
new file mode 100644
index 000000000..59a6b9040
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/expected.h5
Binary files differ
diff --git a/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/input.h5 b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/input.h5
new file mode 100644
index 000000000..2251157c7
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/metadata/tc/input.h5
Binary files differ
diff --git a/nnpackage/examples/v1.3.0/two_tflites/mv1.0.tflite b/nnpackage/examples/v1.3.0/two_tflites/mv1.0.tflite
new file mode 100644
index 000000000..03f30c747
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/mv1.0.tflite
Binary files differ
diff --git a/nnpackage/examples/v1.3.0/two_tflites/mv1.1.tflite b/nnpackage/examples/v1.3.0/two_tflites/mv1.1.tflite
new file mode 100644
index 000000000..e3b4f8db7
--- /dev/null
+++ b/nnpackage/examples/v1.3.0/two_tflites/mv1.1.tflite
Binary files differ
diff --git a/nnpackage/schema/circle_schema.fbs b/nnpackage/schema/circle_schema.fbs
index 3972056f9..8ad444d95 100644
--- a/nnpackage/schema/circle_schema.fbs
+++ b/nnpackage/schema/circle_schema.fbs
@@ -1,4 +1,4 @@
-// Copyright (c) 2019~2020 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright (c) 2019~2022 Samsung Electronics Co., Ltd. All Rights Reserved
// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -28,6 +28,7 @@
// `asymmetric_quantize_inputs` for several operator options
// Version 0.2: BCQ_GATHER and BCQ_FULLY_CONNECTED are added.
// Version 0.3: SHUFFLED16x1FLOAT32 is added.
+// Version 0.4: Base up to TensorFlow Lite v2.7.0 schema.
namespace circle;
@@ -52,6 +53,14 @@ enum TensorType : byte {
COMPLEX64 = 8,
INT8 = 9,
FLOAT64 = 10,
+ COMPLEX128 = 11,
+ UINT64 = 12,
+ // Experimental: Resource and variant types are experimental, that are subject
+ // to change. Do not implement custom kernels using resource & variant types
+ // now.
+ RESOURCE = 13,
+ VARIANT = 14,
+ UINT32 = 15,
}
// Custom quantization parameters for experimenting with new quantization
@@ -224,8 +233,11 @@ table Tensor {
// ones, but not by much. Moreover, while custom operators accept an opaque
// object containing configuration parameters, builtins have a predetermined
// set of acceptable options.
-
-enum BuiltinOperator : ubyte {
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+ BCQ_GATHER = -4,
+ BCQ_FULLY_CONNECTED = -3,
+ INSTANCE_NORM = -2,
ADD = 0,
AVERAGE_POOL_2D = 1,
CONCATENATION = 2,
@@ -258,7 +270,6 @@ enum BuiltinOperator : ubyte {
SPACE_TO_DEPTH = 26,
SVDF = 27,
TANH = 28,
- // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
CONCAT_EMBEDDINGS = 29,
SKIP_GRAM = 30,
CALL = 31,
@@ -360,10 +371,28 @@ enum BuiltinOperator : ubyte {
DENSIFY = 124,
SEGMENT_SUM = 125,
BATCH_MATMUL = 126,
- BCQ_GATHER = 252,
- BCQ_FULLY_CONNECTED = 253,
- INSTANCE_NORM = 254,
-}
+ PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+ CUMSUM = 128,
+ CALL_ONCE = 129,
+ BROADCAST_TO = 130,
+ RFFT2D = 131,
+ CONV_3D = 132,
+ IMAG=133,
+ REAL=134,
+ COMPLEX_ABS=135,
+ HASHTABLE = 136,
+ HASHTABLE_FIND = 137,
+ HASHTABLE_IMPORT = 138,
+ HASHTABLE_SIZE = 139,
+ REDUCE_ALL = 140,
+ CONV_3D_TRANSPOSE = 141,
+ VAR_HANDLE = 142,
+ READ_VARIABLE = 143,
+ ASSIGN_VARIABLE = 144,
+ BROADCAST_ARGS = 145,
+ RANDOM_STANDARD_NORMAL = 146,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
// Options for the builtin operators.
union BuiltinOptions {
@@ -468,6 +497,19 @@ union BuiltinOptions {
DensifyOptions,
SegmentSumOptions,
BatchMatMulOptions,
+ CumsumOptions,
+ CallOnceOptions,
+ BroadcastToOptions,
+ Rfft2dOptions,
+ Conv3DOptions,
+ HashtableOptions,
+ HashtableFindOptions,
+ HashtableImportOptions,
+ HashtableSizeOptions,
+ VarHandleOptions,
+ ReadVariableOptions,
+ AssignVariableOptions,
+ RandomOptions,
BCQGatherOptions = 252,
BCQFullyConnectedOptions = 253,
InstanceNormOptions = 254,
@@ -493,6 +535,18 @@ table Conv2DOptions {
dilation_h_factor:int = 1;
}
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+ padding:Padding;
+ stride_d:int;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_d_factor:int = 1;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
table Pool2DOptions {
padding:Padding;
stride_w:int;
@@ -599,6 +653,8 @@ table ConcatenationOptions {
table AddOptions {
fused_activation_function:ActivationFunctionType;
+ // Parameters supported by version 3.
+ pot_scale_int16:bool = true;
}
table MulOptions {
@@ -606,6 +662,7 @@ table MulOptions {
}
table L2NormOptions {
+ // This field is currently ignored in the L2 Norm Op.
fused_activation_function:ActivationFunctionType;
}
@@ -679,6 +736,7 @@ table ResizeBilinearOptions {
table ResizeNearestNeighborOptions {
align_corners: bool;
+ half_pixel_centers: bool;
}
// A call operation options
@@ -719,6 +777,8 @@ table DepthToSpaceOptions {
table SubOptions {
fused_activation_function:ActivationFunctionType;
+ // Parameters supported by version 5
+ pot_scale_int16:bool = true;
}
table DivOptions {
@@ -740,6 +800,8 @@ table EmbeddingLookupSparseOptions {
table GatherOptions {
axis: int;
+ // Parameters for Gather version 5 or above.
+ batch_dims: int = 0;
}
table TransposeOptions {
@@ -962,6 +1024,10 @@ table IfOptions {
else_subgraph_index:int;
}
+table CallOnceOptions {
+ init_subgraph_index:int;
+}
+
table WhileOptions {
cond_subgraph_index:int;
body_subgraph_index:int;
@@ -988,6 +1054,54 @@ table SegmentSumOptions {
table BatchMatMulOptions {
adjoint_lhs:bool;
adjoint_rhs:bool;
+ // Parameters for BatchMatMul version 4 or above.
+ // If set to true, then weights-only op will use asymmetric quantization for
+ // inputs.
+ asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+ exclusive:bool;
+ reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+ // The identity of hash tables. This identity will be used across different
+ // subgraphs in the same interpreter instance.
+ table_id:int;
+ key_dtype:TensorType;
+ value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+ container:string;
+ shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+ seed: int;
+ seed2: int;
}
table BCQGatherOptions {
@@ -1008,12 +1122,21 @@ table InstanceNormOptions {
// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
// builtin, or a string if the operator is custom.
table OperatorCode {
- builtin_code:BuiltinOperator;
+ // This field is for backward compatibility. This field will be used when
+ // the value of the extended builtin_code field has less than
+ // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+ deprecated_builtin_code:byte;
custom_code:string;
// The version of the operator. The version need to be bumped whenever new
// parameters are introduced into an op.
version:int = 1;
+
+ // This field is introduced for resolving op builtin code shortage problem
+ // (the original BuiltinOperator enum field was represented as a byte).
+ // This field will be used when the value of the extended builtin_code field
+ // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+ builtin_code:BuiltinOperator;
}
enum CustomOptionsFormat : byte {
@@ -1104,6 +1227,35 @@ table Metadata {
buffer:uint;
}
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+ // Represents the alias to use for this tensor.
+ name:string;
+
+ // The actual tensor index in the primary graph, that 'name' corresponds to.
+ tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+ // Named inputs for this signature.
+ inputs:[TensorMap];
+
+ // Named outputs for this signature.
+ outputs:[TensorMap];
+
+ // Key value which was in the Tensorflow SavedModel SignatureDef map.
+ signature_key:string;
+
+ // Model tag, deprecated.
+ deprecated_tag:string (deprecated);
+
+ // Index of subgraphs that corresponds to the exported method.
+ subgraph_index:uint;
+}
+
table Model {
// Version of the schema.
version:uint;
@@ -1132,6 +1284,9 @@ table Model {
// Metadata about the model.
metadata:[Metadata];
+
+ // Optional SignatureDefs for the model.
+ signature_defs:[SignatureDef];
}
root_type Model;
diff --git a/packaging/ABSEIL.tar.gz b/packaging/ABSEIL.tar.gz
index e05654156..dc7aab548 100644
--- a/packaging/ABSEIL.tar.gz
+++ b/packaging/ABSEIL.tar.gz
Binary files differ
diff --git a/packaging/CPUINFO.tar.gz b/packaging/CPUINFO.tar.gz
index ced5debc3..a74fe355a 100644
--- a/packaging/CPUINFO.tar.gz
+++ b/packaging/CPUINFO.tar.gz
Binary files differ
diff --git a/packaging/FP16.tar.gz b/packaging/FP16.tar.gz
index ebd276435..78c787673 100644
--- a/packaging/FP16.tar.gz
+++ b/packaging/FP16.tar.gz
Binary files differ
diff --git a/packaging/RUY.tar.gz b/packaging/RUY.tar.gz
deleted file mode 100644
index 9ad14fe6c..000000000
--- a/packaging/RUY.tar.gz
+++ /dev/null
Binary files differ
diff --git a/packaging/TENSORFLOW-2.8.0-RUY.tar.gz b/packaging/TENSORFLOW-2.8.0-RUY.tar.gz
new file mode 100644
index 000000000..8e6734718
--- /dev/null
+++ b/packaging/TENSORFLOW-2.8.0-RUY.tar.gz
Binary files differ
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec
index 324fe1d11..0518541da 100644
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -1,6 +1,6 @@
Name: nnfw
Summary: nnfw
-Version: 1.20.0
+Version: 1.21.0
Release: 1
Group: Development
License: Apache-2.0 and MIT and BSD-2-Clause and MPL-2.0
@@ -12,7 +12,7 @@ Source1001: nnapi_test_generated.tar.gz
#Source1002: GTEST.tar.gz
Source1003: TENSORFLOW-2.3.0-EIGEN.tar.gz
Source1004: GEMMLOWP.tar.gz
-Source1005: RUY.tar.gz
+Source1005: TENSORFLOW-2.8.0-RUY.tar.gz
Source1006: CPUINFO.tar.gz
Source1007: XNNPACK.tar.gz
Source1008: FXDIV.tar.gz
@@ -28,21 +28,26 @@ Source2001: nnfw.pc.in
Source2002: nnfw-plugin.pc.in
%{!?build_type: %define build_type Release}
+%{!?npud_build: %define npud_build 1}
%{!?trix_support: %define trix_support 1}
%{!?coverage_build: %define coverage_build 0}
%{!?test_build: %define test_build 0}
%{!?extra_option: %define extra_option %{nil}}
+%{!?config_support: %define config_support 1}
+
%if %{coverage_build} == 1
+# Coverage test requires debug build runtime
+%define build_type Debug
%define test_build 1
%endif
BuildRequires: cmake
# Require flatbuffers-devel for onert frontend (model loading)
-BuildRequires: flatbuffers-devel
+BuildRequires: pkgconfig(flatbuffers)
%ifarch %{arm} aarch64
# Require python for acl-ex library build pre-process
-BuildRequires: python
+BuildRequires: python3
BuildRequires: libarmcl-devel >= v21.02
%endif
@@ -50,17 +55,21 @@ Requires(post): /sbin/ldconfig
Requires(postun): /sbin/ldconfig
%if %{test_build} == 1
-BuildRequires: boost-devel
-BuildRequires: tensorflow-lite-devel
+BuildRequires: pkgconfig(boost)
+BuildRequires: pkgconfig(tensorflow-lite)
BuildRequires: hdf5-devel
BuildRequires: libaec-devel
-BuildRequires: zlib-devel
-BuildRequires: libjpeg-devel
+BuildRequires: pkgconfig(zlib)
+BuildRequires: pkgconfig(libjpeg)
BuildRequires: gtest-devel
%endif
+%if %{npud_build} == 1
+BuildRequires: pkgconfig(glib-2.0)
+%endif
+
%if %{trix_support} == 1
-BuildRequires: npu-engine-devel
+BuildRequires: pkgconfig(npu-engine)
%endif
%description
@@ -91,7 +100,18 @@ Minimal test binary for VD manual test
Summary: NNFW Test
%description test
-NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime.
+NNFW test rpm.
+If you want to use test package, you should install runtime package which is build with test build option
+If you want to get coverage info, you should install runtime package which is build with coverage build option
+# TODO Use release runtime pacakge for test
+%endif
+
+%if %{npud_build} == 1
+%package npud
+Summary: NPU daemon
+
+%description npud
+NPU daemon for optimal management of NPU hardware
%endif
%ifarch armv7l
@@ -112,22 +132,40 @@ NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime.
%define install_dir %{_prefix}
%define install_path %{buildroot}%{install_dir}
-%define build_env NNFW_WORKSPACE=build
-%define build_options -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENABLE_TEST=off -DBUILD_MINIMAL_SAMPLE=on
+%define nnfw_workspace build
+%define build_env NNFW_WORKSPACE=%{nnfw_workspace}
-# Set option for test build (and coverage test build)
+# Path to install test bin and scripts (test script assumes path Product/out)
+# TODO Share path with release package
%define test_install_home /opt/usr/nnfw-test
%define test_install_dir %{test_install_home}/Product/out
%define test_install_path %{buildroot}%{test_install_dir}
-%define coverage_option %{nil}
+
+# Set option for test build (and coverage test build)
+%define option_test -DENABLE_TEST=OFF
+%define option_coverage %{nil}
%define test_suite_list infra/scripts tests/scripts
-%define test_build_type %{build_type}
+
+%if %{test_build} == 1
+# ENVVAR_ONERT_CONFIG: Use environment variable for runtime core configuration and debug
+%define option_test -DENABLE_TEST=ON -DENVVAR_ONERT_CONFIG=ON
+%endif # test_build
+
+# Set option for configuration
+%define option_config %{nil}
+%if %{config_support} == 1
+%if %{npud_build} == 1
+# ENVVAR_NPUD_CONFIG: Use environment variable for npud configuration and debug
+%define option_config -DENVVAR_NPUD_CONFIG=ON
+%endif # npud_build
+%endif # config_support
+
%if %{coverage_build} == 1
-%define coverage_option -DENABLE_COVERAGE=ON
-%define test_build_type Debug
-%endif
-%define test_build_env NNFW_INSTALL_PREFIX=%{test_install_path} NNFW_WORKSPACE=build_for_test
-%define test_build_options %{coverage_option} -DCMAKE_BUILD_TYPE=%{test_build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DENVVAR_ONERT_CONFIG=ON
+%define option_coverage -DENABLE_COVERAGE=ON
+%endif # coverage_build
+
+%define build_options -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen -DBUILD_MINIMAL_SAMPLE=ON \\\
+ %{option_test} %{option_coverage} %{option_config} %{extra_option}
%prep
%setup -q
@@ -153,17 +191,13 @@ tar -xf %{SOURCE1016} -C ./externals
%build
%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
# runtime build
-%{build_env} ./nnfw configure %{build_options} %{extra_option}
+%{build_env} ./nnfw configure %{build_options}
%{build_env} ./nnfw build -j4
# install in workspace
# TODO Set install path
%{build_env} ./nnfw install
%if %{test_build} == 1
-# test runtime
-# TODO remove duplicated build process
-%{test_build_env} ./nnfw configure %{test_build_options} %{extra_option}
-%{test_build_env} ./nnfw build -j4
%if %{coverage_build} == 1
pwd > tests/scripts/build_path.txt
%endif # coverage_build
@@ -195,19 +229,37 @@ install -m 0644 ./nnfw.pc.in %{buildroot}%{_libdir}/pkgconfig/nnfw.pc
install -m 0644 ./nnfw-plugin.pc.in %{buildroot}%{_libdir}/pkgconfig/nnfw-plugin.pc
%if %{test_build} == 1
-%{test_build_env} ./nnfw install
+mkdir -p %{test_install_path}/bin
+mkdir -p %{test_install_path}/unittest
+mkdir -p %{test_install_path}/unittest_standalone
+mkdir -p %{test_install_path}/test
+
+install -m 755 build/out/bin/nnapi_test %{test_install_path}/bin
+install -m 755 build/out/bin/nnpackage_run %{test_install_path}/bin
+install -m 755 build/out/bin/tflite_comparator %{test_install_path}/bin
+install -m 755 build/out/bin/tflite_run %{test_install_path}/bin
+install -m 755 build/out/unittest/* %{test_install_path}/unittest
+install -m 755 build/out/unittest_standalone/*_test %{test_install_path}/unittest_standalone
+install -m 755 build/out/unittest_standalone/test_* %{test_install_path}/unittest_standalone
+cp -r build/out/test/* %{test_install_path}/test
+cp -r build/out/unittest_standalone/nnfw_api_gtest_models %{test_install_path}/unittest_standalone
+
# Share test script with ubuntu (ignore error if there is no list for target)
-cp tests/nnapi/nnapi_gtest.skip.%{target_arch}-* %{buildroot}%{test_install_dir}/unittest/.
-cp %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{buildroot}%{test_install_dir}/unittest/nnapi_gtest.skip
+cp tests/nnapi/nnapi_gtest.skip.%{target_arch}-* %{test_install_path}/unittest/.
+cp %{test_install_path}/unittest/nnapi_gtest.skip.%{target_arch}-linux.cpu %{test_install_path}/unittest/nnapi_gtest.skip
tar -zxf test-suite.tar.gz -C %{buildroot}%{test_install_home}
%if %{coverage_build} == 1
mkdir -p %{buildroot}%{test_install_home}/gcov
-find . -name "*.gcno" -exec xargs cp {} %{buildroot}%{test_install_home}/gcov/. \;
+find %{nnfw_workspace} -name "*.gcno" -exec xargs cp {} %{buildroot}%{test_install_home}/gcov/. \;
install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/test/build_path.txt
%endif # coverage_build
%endif # test_build
+%if %{npud_build} == 1
+install -m 755 build/out/bin/npud %{buildroot}%{_bindir}
+%endif
+
%endif
%post -p /sbin/ldconfig
@@ -256,6 +308,15 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
%endif # arm armv7l armv7hl aarch64
%endif # test_build
+%if %{npud_build} == 1
+%files npud
+%manifest %{name}.manifest
+%defattr(-,root,root,-)
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
+%{_bindir}/npud
+%endif # arm armv7l armv7hl aarch64 x86_64 %ix86
+%endif # npud_build
+
%changelog
* Thu Mar 15 2018 Chunseok Lee <chunseok.lee@samsung.com>
- Initial spec file for nnfw
diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.qconf.json b/res/CircleRecipes/Quant_InstanceNorm_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.recipe b/res/CircleRecipes/Quant_InstanceNorm_000/test.recipe
new file mode 100644
index 000000000..b9c2ab8c9
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.recipe
@@ -0,0 +1,43 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+}
+operand {
+ name: "gamma"
+ type: FLOAT32
+ shape { dim: 12 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "beta"
+ type: FLOAT32
+ shape { dim: 12 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+}
+operation {
+ type: "InstanceNorm"
+ input: "ifm"
+ input: "gamma"
+ input: "beta"
+ output: "ofm"
+ instance_norm_options {
+ epsilon: 0.00001
+ activation: NONE
+ }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.reverse b/res/CircleRecipes/Quant_InstanceNorm_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.reverse
diff --git a/res/CircleRecipes/Quant_InstanceNorm_000/test.rule b/res/CircleRecipes/Quant_InstanceNorm_000/test.rule
new file mode 100644
index 000000000..a17692d05
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "GAMMA_S16" $(tensor_dtype gamma) '=' INT16
+RULE "BETA_S16" $(tensor_dtype beta) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.qconf.json b/res/CircleRecipes/Quant_InstanceNorm_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.recipe b/res/CircleRecipes/Quant_InstanceNorm_001/test.recipe
new file mode 100644
index 000000000..b9c2ab8c9
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.recipe
@@ -0,0 +1,43 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+}
+operand {
+ name: "gamma"
+ type: FLOAT32
+ shape { dim: 12 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "beta"
+ type: FLOAT32
+ shape { dim: 12 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 6 dim: 12 }
+}
+operation {
+ type: "InstanceNorm"
+ input: "ifm"
+ input: "gamma"
+ input: "beta"
+ output: "ofm"
+ instance_norm_options {
+ epsilon: 0.00001
+ activation: NONE
+ }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.reverse b/res/CircleRecipes/Quant_InstanceNorm_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.reverse
diff --git a/res/CircleRecipes/Quant_InstanceNorm_001/test.rule b/res/CircleRecipes/Quant_InstanceNorm_001/test.rule
new file mode 100644
index 000000000..e62dd4839
--- /dev/null
+++ b/res/CircleRecipes/Quant_InstanceNorm_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "GAMMA_U8" $(tensor_dtype gamma) '=' UINT8
+RULE "BETA_U8" $(tensor_dtype beta) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/ArgMax_004/test.recipe b/res/TensorFlowLiteRecipes/ArgMax_004/test.recipe
new file mode 100644
index 000000000..b31e16043
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_004/test.recipe
@@ -0,0 +1,30 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: INT64
+ shape { dim: 1 dim: 4 }
+}
+operand {
+ name: "argmax/dim"
+ type: INT32
+ shape { }
+ filler {
+ tag: "explicit"
+ arg: "-1"
+ }
+}
+operation {
+ type: "ArgMax"
+ argmax_options {
+ output_type: INT64
+ }
+ input: "ifm"
+ input: "argmax/dim"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/ArgMax_004/test.reverse b/res/TensorFlowLiteRecipes/ArgMax_004/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/ArgMax_004/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Densify_000/test.recipe b/res/TensorFlowLiteRecipes/Densify_000/test.recipe
new file mode 100644
index 000000000..480c52f15
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Densify_000/test.recipe
@@ -0,0 +1,44 @@
+operand {
+ name: "in"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operand {
+ name: "sparse"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "0" arg: "0" arg: "0"
+ arg: "0" arg: "2" arg: "0" arg: "0"
+ arg: "0" arg: "0" arg: "3" arg: "0"
+ arg: "0" arg: "0" arg: "0" arg: "4"
+ }
+ make_sparse: true
+}
+operand {
+ name: "dense"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operand {
+ name: "out"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operation {
+ type: "Densify"
+ input: "sparse"
+ output: "dense"
+}
+operation {
+ type: "Add"
+ input: "in"
+ input: "dense"
+ output: "out"
+ add_options {
+ activation: NONE
+ }
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_007/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_007/test.recipe
new file mode 100644
index 000000000..572badfbb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_007/test.recipe
@@ -0,0 +1,29 @@
+operand {
+ name: "x"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 }
+}
+operand {
+ name: "y"
+ type: FLOAT32
+ shape { dim: 2 dim: 4 }
+}
+operand {
+ name: "out"
+ type: FLOAT32
+ shape { dim: 2 dim: 2 }
+}
+operation {
+ type: "FullyConnected"
+ fullyconnected_options {
+ activation: NONE
+ keep_num_dims: true
+ }
+ input: "x"
+ input: "y"
+ input: ""
+ output: "out"
+}
+input: "x"
+input: "y"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_007/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_007/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_007/test.reverse
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_007/test.rule b/res/TensorFlowLiteRecipes/FullyConnected_007/test.rule
new file mode 100644
index 000000000..01518e575
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_007/test.rule
@@ -0,0 +1,7 @@
+# To check if FullyConnected with non-const weight is replaced by MatMul
+# with replace_non_const_fc_with_batch_matmul pass
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "BATCH_MATMUL_EXIST" $(op_count BATCH_MATMUL) '=' 1
+RULE "NO_FULLY_CONNECTED" $(op_count FULLY_CONNECTED) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_Densify_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Densify_Add_000/test.recipe
new file mode 100644
index 000000000..ea604b20f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Densify_Add_000/test.recipe
@@ -0,0 +1,44 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operand {
+ name: "sparse"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "0" arg: "0" arg: "0"
+ arg: "0" arg: "2" arg: "0" arg: "0"
+ arg: "0" arg: "0" arg: "3" arg: "0"
+ arg: "0" arg: "0" arg: "0" arg: "4"
+ }
+ make_sparse: true
+}
+operand {
+ name: "dense"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operation {
+ type: "Densify"
+ input: "sparse"
+ output: "dense"
+}
+operation {
+ type: "Add"
+ input: "ifm"
+ input: "dense"
+ output: "ofm"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Densify_Dequantize_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Densify_Dequantize_Add_000/test.recipe
new file mode 100644
index 000000000..6e1083fae
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Densify_Dequantize_Add_000/test.recipe
@@ -0,0 +1,54 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operand {
+ name: "sparse16"
+ type: FLOAT16
+ shape { dim: 4 dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "0" arg: "0" arg: "0"
+ arg: "0" arg: "2" arg: "0" arg: "0"
+ arg: "0" arg: "0" arg: "3" arg: "0"
+ arg: "0" arg: "0" arg: "0" arg: "4"
+ }
+ make_sparse: true
+}
+operand {
+ name: "dense16"
+ type: FLOAT16
+ shape { dim: 4 dim: 4 }
+}
+operand {
+ name: "dense32"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 4 dim: 4 }
+}
+operation {
+ type: "Densify"
+ input: "sparse16"
+ output: "dense16"
+}
+operation {
+ type: "Dequantize"
+ input: "dense16"
+ output: "dense32"
+}
+operation {
+ type: "Add"
+ input: "ifm"
+ input: "dense32"
+ output: "ofm"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Dequantize_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Dequantize_Add_000/test.recipe
new file mode 100644
index 000000000..5f212a7a6
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Dequantize_Add_000/test.recipe
@@ -0,0 +1,41 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "float16"
+ type: FLOAT16
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "dequantized"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Dequantize"
+ input: "float16"
+ output: "dequantized"
+}
+operation {
+ type: "Add"
+ input: "ifm"
+ input: "dequantized"
+ output: "ofm"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe
index b3247f24f..afb9a9c4d 100644
--- a/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_000/test.recipe
@@ -12,9 +12,6 @@ operand {
arg: "0.0"
arg: "0.1"
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "Addition"
@@ -25,9 +22,6 @@ operand {
dim: 4
dim: 1
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "Addition_add_param"
@@ -39,9 +33,6 @@ operand {
tag: "explicit"
arg: "-2.04724"
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "Hole"
@@ -52,11 +43,6 @@ operand {
dim: 2
dim: 2
}
- quant {
- min: 0
- max: 255
- quantized_dimension: 0
- }
}
operand {
name: "conv2d_transpose"
@@ -67,9 +53,6 @@ operand {
dim: 4
dim: 1
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "input_size"
@@ -84,9 +67,6 @@ operand {
arg: "4"
arg: "1"
}
- quant {
- quantized_dimension: 0
- }
}
operation {
type: "TransposeConv"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe
index 89a344f0e..b1c9784b0 100644
--- a/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_001/test.recipe
@@ -12,9 +12,6 @@ operand {
arg: "0.0"
arg: "0.1"
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "Addition"
@@ -25,9 +22,6 @@ operand {
dim: 4
dim: 1
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "Addition_add_param"
@@ -45,9 +39,6 @@ operand {
arg: "1" arg: "2" arg: "3" arg: "4"
arg: "-1" arg: "-2" arg: "-3" arg: "-4"
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "Hole"
@@ -58,11 +49,6 @@ operand {
dim: 2
dim: 2
}
- quant {
- min: 0
- max: 255
- quantized_dimension: 0
- }
}
operand {
name: "conv2d_transpose"
@@ -73,9 +59,6 @@ operand {
dim: 4
dim: 1
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "input_size"
@@ -90,9 +73,6 @@ operand {
arg: "4"
arg: "1"
}
- quant {
- quantized_dimension: 0
- }
}
operation {
type: "TransposeConv"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe
index cfea30653..426551485 100644
--- a/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_TConv_Add_002/test.recipe
@@ -12,9 +12,6 @@ operand {
arg: "0.0"
arg: "0.1"
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "Addition"
@@ -25,9 +22,6 @@ operand {
dim: 4
dim: 1
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "Addition_add_param"
@@ -39,9 +33,6 @@ operand {
tag: "explicit"
arg: "-2.04724"
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "Hole"
@@ -52,11 +43,6 @@ operand {
dim: 2
dim: 2
}
- quant {
- min: 0
- max: 255
- quantized_dimension: 0
- }
}
operand {
name: "conv2d_transpose"
@@ -67,9 +53,6 @@ operand {
dim: 4
dim: 1
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "input_size"
@@ -84,9 +67,6 @@ operand {
arg: "4"
arg: "1"
}
- quant {
- quantized_dimension: 0
- }
}
operation {
type: "TransposeConv"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe
index 65248f23b..ef329e1ac 100644
--- a/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_000/test.recipe
@@ -12,9 +12,6 @@ operand {
arg: "0.0"
arg: "0.1"
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "FusedBatchNormV3"
@@ -25,9 +22,6 @@ operand {
dim: 4
dim: 1
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "FusedBatchNormV3_add_param"
@@ -39,9 +33,6 @@ operand {
tag: "explicit"
arg: "-2.04724"
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "FusedBatchNormV3_mul_0"
@@ -52,9 +43,6 @@ operand {
dim: 4
dim: 1
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "FusedBatchNormV3_mul_0_param"
@@ -66,9 +54,6 @@ operand {
tag: "explicit"
arg: "2.00834"
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "Hole"
@@ -79,11 +64,6 @@ operand {
dim: 2
dim: 1
}
- quant {
- min: 0
- max: 255
- quantized_dimension: 0
- }
}
operand {
name: "conv2d_transpose"
@@ -94,9 +74,6 @@ operand {
dim: 4
dim: 1
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "conv2d_transpose/input_sizes"
@@ -111,9 +88,6 @@ operand {
arg: "4"
arg: "1"
}
- quant {
- quantized_dimension: 0
- }
}
operation {
type: "TransposeConv"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe
index babf5af4e..1b329bafc 100644
--- a/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_001/test.recipe
@@ -12,9 +12,6 @@ operand {
arg: "0.0"
arg: "0.1"
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "FusedBatchNormV3"
@@ -25,9 +22,6 @@ operand {
dim: 4
dim: 1
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "FusedBatchNormV3_add_param"
@@ -39,9 +33,6 @@ operand {
tag: "explicit"
arg: "-2.04724"
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "FusedBatchNormV3_mul_0"
@@ -52,9 +43,6 @@ operand {
dim: 4
dim: 1
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "FusedBatchNormV3_mul_0_param"
@@ -66,9 +54,6 @@ operand {
tag: "explicit"
arg: "2.00834"
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "Hole"
@@ -79,11 +64,6 @@ operand {
dim: 2
dim: 2
}
- quant {
- min: 0
- max: 255
- quantized_dimension: 0
- }
}
operand {
name: "conv2d_transpose"
@@ -94,9 +74,6 @@ operand {
dim: 4
dim: 1
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "conv2d_transpose/input_sizes"
@@ -111,9 +88,6 @@ operand {
arg: "4"
arg: "1"
}
- quant {
- quantized_dimension: 0
- }
}
operation {
type: "TransposeConv"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe
index e40fe4f59..a8af8e497 100644
--- a/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_002/test.recipe
@@ -8,10 +8,6 @@ operand {
dim: 1
dim: 2
}
- quant {
- quantized_dimension: 0
- }
- is_variable: false
}
operand {
name: "conv2d_transpose/input_sizes"
@@ -26,10 +22,6 @@ operand {
arg: "1"
arg: "2"
}
- quant {
- quantized_dimension: 0
- }
- is_variable: false
}
operand {
name: "FusedBatchNormV3"
@@ -42,10 +34,6 @@ operand {
arg: "-2.04724"
arg: "-7.80109"
}
- quant {
- quantized_dimension: 0
- }
- is_variable: false
}
operand {
name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes"
@@ -61,10 +49,6 @@ operand {
arg: "0.0"
arg: "0.1"
}
- quant {
- quantized_dimension: 0
- }
- is_variable: false
}
operand {
name: "FusedBatchNormV3;conv2d_transpose;conv2d_transpose/input_sizes2"
@@ -75,10 +59,6 @@ operand {
dim: 1
dim: 2
}
- quant {
- quantized_dimension: 0
- }
- is_variable: false
}
operand {
name: "FusedBatchNormV3_mul_0"
@@ -89,9 +69,6 @@ operand {
dim: 1
dim: 2
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "FusedBatchNormV3_mul_0_param"
@@ -104,9 +81,6 @@ operand {
arg: "2.00834"
arg: "1.00344"
}
- quant {
- quantized_dimension: 0
- }
}
operand {
name: "Relu6"
@@ -117,10 +91,6 @@ operand {
dim: 1
dim: 2
}
- quant {
- quantized_dimension: 0
- }
- is_variable: false
}
operation {
type: "TransposeConv"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.recipe
new file mode 100644
index 000000000..c28e50880
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.recipe
@@ -0,0 +1,135 @@
+operand {
+ name: "Const_transposed"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 3
+ dim: 3
+ dim: 2
+ }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "0.1"
+ }
+}
+operand {
+ name: "Output"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 4
+ dim: 4
+ dim: 1
+ }
+}
+operand {
+ name: "FusedBatchNormV3_add_param"
+ type: FLOAT32
+ shape {
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "-2.04724"
+ }
+}
+operand {
+ name: "FusedBatchNormV3_mul_0"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 4
+ dim: 4
+ dim: 1
+ }
+}
+operand {
+ name: "FusedBatchNormV3_mul_0_param"
+ type: FLOAT32
+ shape {
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "2.00834"
+ }
+}
+operand {
+ name: "Input"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 2
+ dim: 2
+ dim: 2
+ }
+}
+operand {
+ name: "conv2d_transpose"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 4
+ dim: 4
+ dim: 1
+ }
+}
+operand {
+ name: "conv2d_transpose/input_sizes"
+ type: INT32
+ shape {
+ dim: 4
+ }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ arg: "4"
+ arg: "4"
+ arg: "1"
+ }
+}
+operand {
+ name: "conv2d_transpose/bias"
+ type: FLOAT32
+ shape {
+ dim: 1
+ }
+ filler {
+ tag: "explicit"
+ arg: "1.03"
+ }
+}
+operation {
+ type: "TransposeConv"
+ input: "conv2d_transpose/input_sizes"
+ input: "Const_transposed"
+ input: "Input"
+ input: "conv2d_transpose/bias"
+ output: "conv2d_transpose"
+ transpose_conv_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+}
+operation {
+ type: "Mul"
+ input: "conv2d_transpose"
+ input: "FusedBatchNormV3_mul_0_param"
+ output: "FusedBatchNormV3_mul_0"
+ mul_options {
+ activation: NONE
+ }
+}
+operation {
+ type: "Add"
+ input: "FusedBatchNormV3_mul_0"
+ input: "FusedBatchNormV3_add_param"
+ output: "Output"
+ add_options {
+ activation: NONE
+ }
+}
+input: "Input"
+output: "Output"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.rule
new file mode 100644
index 000000000..0988ecf28
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_003/test.rule
@@ -0,0 +1,7 @@
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "TCONV_EXIST" $(op_count TRANSPOSE_CONV) '=' 1
+RULE "NO_MUL" $(op_count MUL) '=' 0
+RULE "NO_ADD" $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.recipe b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.recipe
new file mode 100644
index 000000000..b75527a98
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.recipe
@@ -0,0 +1,149 @@
+operand {
+ name: "conv2d_transpose/input_sizes"
+ type: INT32
+ shape {
+ dim: 4
+ }
+ filler {
+ tag: "explicit"
+ arg: "1"
+ arg: "4"
+ arg: "4"
+ arg: "16"
+ }
+}
+operand {
+ name: "Const_transposed"
+ type: FLOAT32
+ shape {
+ dim: 16
+ dim: 3
+ dim: 3
+ dim: 2
+ }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "0.1"
+ }
+}
+operand {
+ name: "Input"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 2
+ dim: 2
+ dim: 2
+ }
+}
+operand {
+ name: "conv2d_transpose/bias"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ dim: 1
+ dim: 16
+ }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "0.1"
+ }
+}
+operand {
+ name: "conv2d_transpose"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 4
+ dim: 4
+ dim: 16
+ }
+}
+operation {
+ type: "TransposeConv"
+ input: "conv2d_transpose/input_sizes"
+ input: "Const_transposed"
+ input: "Input"
+ input: "conv2d_transpose/bias"
+ output: "conv2d_transpose"
+ transpose_conv_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+}
+
+operand {
+ name: "FusedBatchNormV3_mul_0"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 4
+ dim: 4
+ dim: 16
+ }
+}
+operand {
+ name: "FusedBatchNormV3_mul_0_param"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ dim: 1
+ dim: 16
+ }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "0.1"
+ }
+}
+operation {
+ type: "Mul"
+ input: "conv2d_transpose"
+ input: "FusedBatchNormV3_mul_0_param"
+ output: "FusedBatchNormV3_mul_0"
+ mul_options {
+ activation: NONE
+ }
+}
+
+operand {
+ name: "FusedBatchNormV3_add_param"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 1
+ dim: 1
+ dim: 16
+ }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "0.1"
+ }
+}
+operand {
+ name: "Output"
+ type: FLOAT32
+ shape {
+ dim: 1
+ dim: 4
+ dim: 4
+ dim: 16
+ }
+}
+operation {
+ type: "Add"
+ input: "FusedBatchNormV3_mul_0"
+ input: "FusedBatchNormV3_add_param"
+ output: "Output"
+ add_options {
+ activation: NONE
+ }
+}
+input: "Input"
+output: "Output"
diff --git a/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.rule b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.rule
new file mode 100644
index 000000000..0988ecf28
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_TConv_BN_004/test.rule
@@ -0,0 +1,7 @@
+# To check if BatchNorm op(mul + add) is fused to Transposed Convolution op
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "TCONV_EXIST" $(op_count TRANSPOSE_CONV) '=' 1
+RULE "NO_MUL" $(op_count MUL) '=' 0
+RULE "NO_ADD" $(op_count ADD) '=' 0
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Add_001/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Add_001/test.recipe
new file mode 100644
index 000000000..0ae4862d1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_001/test.recipe
@@ -0,0 +1,31 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Add"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Add_001/test.rule
new file mode 100644
index 000000000..b51f4ebbb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_001/test.rule
@@ -0,0 +1,12 @@
+# To check mixed quantization.
+# Default dtype: U8, Add dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM1_U8" $(tensor_dtype ifm1) '=' UINT8
+RULE "IFM1_QUANTIZE_S16" $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE "IFM2_S16" $(tensor_dtype ifm2) '=' INT16
+RULE "ADD_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_002/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Add_002/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_002/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_002/test.recipe b/res/TensorFlowLiteRecipes/Quant_Add_002/test.recipe
new file mode 100644
index 000000000..0ae4862d1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_002/test.recipe
@@ -0,0 +1,31 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Add"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+ add_options {
+ activation: NONE
+ }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_002/test.rule b/res/TensorFlowLiteRecipes/Quant_Add_002/test.rule
new file mode 100644
index 000000000..96a2535ef
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_002/test.rule
@@ -0,0 +1,12 @@
+# To check mixed quantization.
+# Default dtype: S16, Add dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM1_S16" $(tensor_dtype ifm1) '=' INT16
+RULE "IFM1_QUANTIZE_U8" $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE "IFM2_U8" $(tensor_dtype ifm2) '=' UINT8
+RULE "ADD_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_U8" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.recipe
new file mode 100644
index 000000000..746c34334
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.recipe
@@ -0,0 +1,24 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+ type: "AveragePool2D"
+ averagepool2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ filter_width: 2
+ filter_height: 2
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.rule b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.recipe
new file mode 100644
index 000000000..746c34334
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.recipe
@@ -0,0 +1,24 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+ type: "AveragePool2D"
+ averagepool2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ filter_width: 2
+ filter_height: 2
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.rule b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_AveragePool2D_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.recipe
new file mode 100644
index 000000000..2f2e91a9e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 3 dim: 4 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+}
+operation {
+ type: "BatchMatMul"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+ batch_matmul_options {
+ adj_x: false
+ adj_y: false
+ }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.rule b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.rule
new file mode 100644
index 000000000..e832ac526
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM1_U8" $(tensor_dtype ifm1) '=' UINT8
+RULE "IFM1_QUANTIZE_S16" $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE "IFM2_U8" $(tensor_dtype ifm2) '=' UINT8
+RULE "IFM2_QUANTIZE_S16" $(tensor_dtype ifm2_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.recipe
new file mode 100644
index 000000000..2f2e91a9e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.recipe
@@ -0,0 +1,28 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 3 dim: 4 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+}
+operation {
+ type: "BatchMatMul"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+ batch_matmul_options {
+ adj_x: false
+ adj_y: false
+ }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.rule b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.rule
new file mode 100644
index 000000000..248337716
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_BatchMatMul_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM1_S16" $(tensor_dtype ifm1) '=' INT16
+RULE "IFM1_QUANTIZE_U8" $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE "IFM2_S16" $(tensor_dtype ifm2) '=' INT16
+RULE "IFM2_QUANTIZE_U8" $(tensor_dtype ifm2_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.recipe
new file mode 100644
index 000000000..35641bd07
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.recipe
@@ -0,0 +1,28 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Concatenation"
+ concatenation_options {
+ axis: 3
+ activation: NONE
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.rule
new file mode 100644
index 000000000..e832ac526
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM1_U8" $(tensor_dtype ifm1) '=' UINT8
+RULE "IFM1_QUANTIZE_S16" $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE "IFM2_U8" $(tensor_dtype ifm2) '=' UINT8
+RULE "IFM2_QUANTIZE_S16" $(tensor_dtype ifm2_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.recipe
new file mode 100644
index 000000000..35641bd07
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.recipe
@@ -0,0 +1,28 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 1 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Concatenation"
+ concatenation_options {
+ axis: 3
+ activation: NONE
+ }
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.rule
new file mode 100644
index 000000000..248337716
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Concatenation_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM1_S16" $(tensor_dtype ifm1) '=' INT16
+RULE "IFM1_QUANTIZE_U8" $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE "IFM2_S16" $(tensor_dtype ifm2) '=' INT16
+RULE "IFM2_QUANTIZE_U8" $(tensor_dtype ifm2_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.recipe
new file mode 100644
index 000000000..8a9328be1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.recipe
@@ -0,0 +1,44 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 2
+ stride_h: 2
+ }
+ input: "ifm"
+ input: "filter"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.rule
new file mode 100644
index 000000000..f7af083da
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_000/test.rule
@@ -0,0 +1,10 @@
+# To check float32 input.
+# Input is float32, Conv is uint8. Quantize Op is inserted at the beginning.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "INPUT_FLOAT32" $(tensor_dtype ifm) '=' FLOAT32
+RULE "CONV_UINT8" $(tensor_dtype ofm) '=' UINT8
+RULE "WEIGHTS_UINT8" $(tensor_dtype filter) '=' UINT8
+RULE "BIAS_INT32" $(tensor_dtype bias) '=' INT32
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.recipe
new file mode 100644
index 000000000..8a9328be1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.recipe
@@ -0,0 +1,44 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 2
+ stride_h: 2
+ }
+ input: "ifm"
+ input: "filter"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.rule
new file mode 100644
index 000000000..a3f52f26d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_001/test.rule
@@ -0,0 +1,11 @@
+# To check float32 output.
+# Output is float32, Conv is uint8. Dequantize Op is inserted at the end.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+# Update tensor name (ofm_Dequantize) if 'create_dequantize' function is changed.
+RULE "OUTPUT_FLOAT32" $(tensor_dtype ofm_Dequantize) '=' FLOAT32
+RULE "CONV_UINT8" $(tensor_dtype ofm) '=' UINT8
+RULE "WEIGHTS_UINT8" $(tensor_dtype filter) '=' UINT8
+RULE "BIAS_INT32" $(tensor_dtype bias) '=' INT32
+RULE "DEQUANTIZE_OP" $(op_count DEQUANTIZE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_002/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.recipe
new file mode 100644
index 000000000..8a9328be1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.recipe
@@ -0,0 +1,44 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+ name: "filter"
+ type: FLOAT32
+ shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 64 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 2
+ stride_h: 2
+ }
+ input: "ifm"
+ input: "filter"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_002/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.rule
new file mode 100644
index 000000000..2187895f8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_002/test.rule
@@ -0,0 +1,13 @@
+# To check float32 input/output.
+# Input/Output is float32, Conv is uint8.
+# Quantize Op is inserted at the beginning, Dequantize Op is inserted at the end.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "INPUT_FLOAT32" $(tensor_dtype ifm) '=' FLOAT32
+RULE "OUTPUT_FLOAT32" $(tensor_dtype ofm_Dequantize) '=' FLOAT32
+RULE "CONV_UINT8" $(tensor_dtype ofm) '=' UINT8
+RULE "WEIGHTS_UINT8" $(tensor_dtype filter) '=' UINT8
+RULE "BIAS_INT32" $(tensor_dtype bias) '=' INT32
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 1
+RULE "DEQUANTIZE_OP" $(op_count DEQUANTIZE) '=' 1
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.recipe
new file mode 100644
index 000000000..9cf8a0f69
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.recipe
@@ -0,0 +1,44 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.reverse b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_003/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.rule
new file mode 100644
index 000000000..50f235a55
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_003/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "KER_S16" $(tensor_dtype ker) '=' INT16
+RULE "BIAS_S64" $(tensor_dtype bias) '=' INT64
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.recipe
new file mode 100644
index 000000000..9cf8a0f69
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.recipe
@@ -0,0 +1,44 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+ type: "Conv2D"
+ conv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.reverse b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_004/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.rule
new file mode 100644
index 000000000..ffa3bc906
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_004/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "KER_U8" $(tensor_dtype ker) '=' UINT8
+RULE "BIAS_S32" $(tensor_dtype bias) '=' INT32
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.recipe
new file mode 100644
index 000000000..148256aa2
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.recipe
@@ -0,0 +1,49 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 2 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 2 dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "2" arg: "3" arg: "4"
+ arg: "-9" arg: "10" arg: "-11" arg: "12"
+ arg: "5" arg: "6" arg: "7" arg: "8"
+ arg: "13" arg: "-14" arg: "15" arg: "-16"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "2" arg: "3" arg: "4"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 1 dim: 4 }
+}
+operation {
+ type: "DepthwiseConv2D"
+ depthwiseconv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 2
+ dilation_w_factor: 1
+ dilation_h_factor: 1
+ depth_multiplier: 2
+ activation : RELU
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.rule b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.rule
new file mode 100644
index 000000000..50f235a55
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "KER_S16" $(tensor_dtype ker) '=' INT16
+RULE "BIAS_S64" $(tensor_dtype bias) '=' INT64
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.recipe
new file mode 100644
index 000000000..148256aa2
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.recipe
@@ -0,0 +1,49 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 2 dim: 2 }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 2 dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "2" arg: "3" arg: "4"
+ arg: "-9" arg: "10" arg: "-11" arg: "12"
+ arg: "5" arg: "6" arg: "7" arg: "8"
+ arg: "13" arg: "-14" arg: "15" arg: "-16"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "2" arg: "3" arg: "4"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 2 dim: 1 dim: 4 }
+}
+operation {
+ type: "DepthwiseConv2D"
+ depthwiseconv2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 2
+ dilation_w_factor: 1
+ dilation_h_factor: 1
+ depth_multiplier: 2
+ activation : RELU
+ }
+ input: "ifm"
+ input: "ker"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.rule b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.rule
new file mode 100644
index 000000000..ffa3bc906
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_DepthwiseConv2D_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "KER_U8" $(tensor_dtype ker) '=' UINT8
+RULE "BIAS_S32" $(tensor_dtype bias) '=' INT32
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.qconf.json
new file mode 100644
index 000000000..ad2bad697
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "out",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.recipe
new file mode 100644
index 000000000..0ecb5618b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.recipe
@@ -0,0 +1,55 @@
+operand {
+ name: "in"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 }
+}
+operand {
+ name: "weight"
+ type: FLOAT32
+ shape { dim: 4 dim: 16 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "2" arg: "-3" arg: "-4"
+ arg: "-5" arg: "6" arg: "-7" arg: "8"
+ arg: "4" arg: "-2" arg: "3" arg: "-1"
+ arg: "-8" arg: "-6" arg: "7" arg: "5"
+ arg: "1" arg: "2" arg: "-3" arg: "-4"
+ arg: "-5" arg: "6" arg: "-7" arg: "8"
+ arg: "4" arg: "-2" arg: "3" arg: "-1"
+ arg: "-8" arg: "-6" arg: "7" arg: "5"
+ arg: "1" arg: "2" arg: "-3" arg: "-4"
+ arg: "-5" arg: "6" arg: "-7" arg: "8"
+ arg: "4" arg: "-2" arg: "3" arg: "-1"
+ arg: "-8" arg: "-6" arg: "7" arg: "5"
+ arg: "1" arg: "2" arg: "-3" arg: "-4"
+ arg: "-5" arg: "6" arg: "-7" arg: "8"
+ arg: "4" arg: "-2" arg: "3" arg: "-1"
+ arg: "-8" arg: "-6" arg: "7" arg: "5"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "-2" arg: "-3" arg: "4"
+ }
+}
+operand {
+ name: "out"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 }
+}
+operation {
+ type: "FullyConnected"
+ fullyconnected_options {
+ activation: NONE
+ }
+ input: "in"
+ input: "weight"
+ input: "bias"
+ output: "out"
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.rule b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.rule
new file mode 100644
index 000000000..f54256084
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IN_U8" $(tensor_dtype in) '=' UINT8
+RULE "IN_QUANTIZE_S16" $(tensor_dtype in_Quantize) '=' INT16
+RULE "WEIGHT_S16" $(tensor_dtype weight) '=' INT16
+RULE "BIAS_S64" $(tensor_dtype bias) '=' INT64
+RULE "TARGET_S16" $(tensor_dtype out) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype out_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.qconf.json
new file mode 100644
index 000000000..ff3eb9791
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "out",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.recipe
new file mode 100644
index 000000000..0ecb5618b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.recipe
@@ -0,0 +1,55 @@
+operand {
+ name: "in"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 }
+}
+operand {
+ name: "weight"
+ type: FLOAT32
+ shape { dim: 4 dim: 16 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "2" arg: "-3" arg: "-4"
+ arg: "-5" arg: "6" arg: "-7" arg: "8"
+ arg: "4" arg: "-2" arg: "3" arg: "-1"
+ arg: "-8" arg: "-6" arg: "7" arg: "5"
+ arg: "1" arg: "2" arg: "-3" arg: "-4"
+ arg: "-5" arg: "6" arg: "-7" arg: "8"
+ arg: "4" arg: "-2" arg: "3" arg: "-1"
+ arg: "-8" arg: "-6" arg: "7" arg: "5"
+ arg: "1" arg: "2" arg: "-3" arg: "-4"
+ arg: "-5" arg: "6" arg: "-7" arg: "8"
+ arg: "4" arg: "-2" arg: "3" arg: "-1"
+ arg: "-8" arg: "-6" arg: "7" arg: "5"
+ arg: "1" arg: "2" arg: "-3" arg: "-4"
+ arg: "-5" arg: "6" arg: "-7" arg: "8"
+ arg: "4" arg: "-2" arg: "3" arg: "-1"
+ arg: "-8" arg: "-6" arg: "7" arg: "5"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "-2" arg: "-3" arg: "4"
+ }
+}
+operand {
+ name: "out"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 }
+}
+operation {
+ type: "FullyConnected"
+ fullyconnected_options {
+ activation: NONE
+ }
+ input: "in"
+ input: "weight"
+ input: "bias"
+ output: "out"
+}
+input: "in"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.rule b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.rule
new file mode 100644
index 000000000..4acd22946
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_FullyConnected_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IN_S16" $(tensor_dtype in) '=' INT16
+RULE "IN_QUANTIZE_U8" $(tensor_dtype in_Quantize) '=' UINT8
+RULE "WEIGHT_U8" $(tensor_dtype weight) '=' UINT8
+RULE "BIAS_S32" $(tensor_dtype bias) '=' INT32
+RULE "TARGET_U8" $(tensor_dtype out) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype out_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.recipe
new file mode 100644
index 000000000..836a37305
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.recipe
@@ -0,0 +1,20 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "LeakyRelu"
+ leaky_relu_options {
+ alpha: 2.0
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.rule b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.recipe
new file mode 100644
index 000000000..836a37305
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.recipe
@@ -0,0 +1,20 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "LeakyRelu"
+ leaky_relu_options {
+ alpha: 2.0
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.rule b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_LeakyRelu_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.recipe
new file mode 100644
index 000000000..dca24da4c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Logistic"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.recipe
new file mode 100644
index 000000000..dca24da4c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Logistic"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Logistic_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.recipe
new file mode 100644
index 000000000..718630f08
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.recipe
@@ -0,0 +1,24 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+ type: "MaxPool2D"
+ maxpool2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ filter_width: 2
+ filter_height: 2
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.rule b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.recipe
new file mode 100644
index 000000000..718630f08
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.recipe
@@ -0,0 +1,24 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 7 dim: 7 dim: 1 }
+}
+operation {
+ type: "MaxPool2D"
+ maxpool2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ filter_width: 2
+ filter_height: 2
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.rule b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_MaxPool2D_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.recipe
new file mode 100644
index 000000000..d383997d3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 1 }
+ filler { tag: "explicit" arg: "-1" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+ type: "Mean"
+ mean_options {
+ keep_dims: true
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.recipe
new file mode 100644
index 000000000..d383997d3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 4 }
+}
+operand {
+ name: "reduction_indices"
+ type: INT32
+ shape { dim: 1 }
+ filler { tag: "explicit" arg: "-1" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 8 dim: 8 dim: 1 }
+}
+operation {
+ type: "Mean"
+ mean_options {
+ keep_dims: true
+ }
+ input: "ifm"
+ input: "reduction_indices"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Mean_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mean_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.recipe
new file mode 100644
index 000000000..43ca30dec
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Mul"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+ mul_options {
+ activation: NONE
+ }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.rule
new file mode 100644
index 000000000..e832ac526
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM1_U8" $(tensor_dtype ifm1) '=' UINT8
+RULE "IFM1_QUANTIZE_S16" $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE "IFM2_U8" $(tensor_dtype ifm2) '=' UINT8
+RULE "IFM2_QUANTIZE_S16" $(tensor_dtype ifm2_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.recipe
new file mode 100644
index 000000000..43ca30dec
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ifm2"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "Mul"
+ input: "ifm1"
+ input: "ifm2"
+ output: "ofm"
+ mul_options {
+ activation: NONE
+ }
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Mul_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.rule
new file mode 100644
index 000000000..248337716
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Mul_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM1_S16" $(tensor_dtype ifm1) '=' INT16
+RULE "IFM1_QUANTIZE_U8" $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE "IFM2_S16" $(tensor_dtype ifm2) '=' INT16
+RULE "IFM2_QUANTIZE_U8" $(tensor_dtype ifm2_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.recipe
new file mode 100644
index 000000000..447e4a1ab
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Neg"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.recipe
new file mode 100644
index 000000000..447e4a1ab
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Neg"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Neg_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Neg_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.recipe
new file mode 100644
index 000000000..c18acdbbc
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "alpha"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 3 }
+ filler {
+ tag: "explicit"
+ arg: "0.1" arg: "0.3" arg: "0.5"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "PRelu"
+ input: "ifm"
+ input: "alpha"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.rule b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.rule
new file mode 100644
index 000000000..81436146c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_000/test.rule
@@ -0,0 +1,12 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "ALPHA_S16" $(tensor_dtype alpha) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.recipe
new file mode 100644
index 000000000..c18acdbbc
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "alpha"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 3 }
+ filler {
+ tag: "explicit"
+ arg: "0.1" arg: "0.3" arg: "0.5"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operation {
+ type: "PRelu"
+ input: "ifm"
+ input: "alpha"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.rule b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.rule
new file mode 100644
index 000000000..5b9416017
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_PRelu_001/test.rule
@@ -0,0 +1,12 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "ALPHA_U8" $(tensor_dtype alpha) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.recipe
new file mode 100644
index 000000000..2cc980b9c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.recipe
@@ -0,0 +1,30 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "padding"
+ type: INT32
+ shape { dim: 4 dim: 2 }
+ filler {
+ tag: "explicit"
+ arg: "0" arg: "0"
+ arg: "1" arg: "1"
+ arg: "2" arg: "2"
+ arg: "0" arg: "0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 7 dim: 2 }
+}
+operation {
+ type: "Pad"
+ input: "ifm"
+ input: "padding"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.recipe
new file mode 100644
index 000000000..2cc980b9c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.recipe
@@ -0,0 +1,30 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "padding"
+ type: INT32
+ shape { dim: 4 dim: 2 }
+ filler {
+ tag: "explicit"
+ arg: "0" arg: "0"
+ arg: "1" arg: "1"
+ arg: "2" arg: "2"
+ arg: "0" arg: "0"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 7 dim: 2 }
+}
+operation {
+ type: "Pad"
+ input: "ifm"
+ input: "padding"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Pad_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Pad_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.recipe
new file mode 100644
index 000000000..226593593
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "ReLU6"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.recipe
new file mode 100644
index 000000000..226593593
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "ReLU6"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU6_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.recipe
new file mode 100644
index 000000000..8eaa3602f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "ReLU"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.recipe
new file mode 100644
index 000000000..8eaa3602f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "ReLU"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ReLU_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.recipe
new file mode 100644
index 000000000..cdca58980
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.recipe
@@ -0,0 +1,20 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 10 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 10 }
+}
+operation {
+ type: "Reshape"
+ reshape_options {
+ new_shape: 10
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.recipe
new file mode 100644
index 000000000..cdca58980
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.recipe
@@ -0,0 +1,20 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 1 dim: 10 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 10 }
+}
+operation {
+ type: "Reshape"
+ reshape_options {
+ new_shape: 10
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Reshape_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.recipe
new file mode 100644
index 000000000..3dd4c761c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.recipe
@@ -0,0 +1,30 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "size"
+ type: INT32
+ shape { dim: 2 }
+ filler {
+ tag: "constant" arg: "16" arg: "16"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operation {
+ type: "ResizeBilinear"
+ input: "ifm1"
+ input: "size"
+ output: "ofm"
+ resize_bilinear_options {
+ align_corners: false
+ half_pixel_centers: false
+ }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.rule
new file mode 100644
index 000000000..3a3429d41
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm1) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm1_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.recipe
new file mode 100644
index 000000000..3dd4c761c
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.recipe
@@ -0,0 +1,30 @@
+operand {
+ name: "ifm1"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "size"
+ type: INT32
+ shape { dim: 2 }
+ filler {
+ tag: "constant" arg: "16" arg: "16"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 3 }
+}
+operation {
+ type: "ResizeBilinear"
+ input: "ifm1"
+ input: "size"
+ output: "ofm"
+ resize_bilinear_options {
+ align_corners: false
+ half_pixel_centers: false
+ }
+}
+input: "ifm1"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.rule
new file mode 100644
index 000000000..2c5fcd5a3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeBilinear_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm1) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm1_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.recipe
new file mode 100644
index 000000000..ef6b964c9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 8 }
+}
+operand {
+ name: "size"
+ type: INT32
+ shape { dim: 2 }
+ filler { tag: "explicit" arg: "16" arg: "16" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 8 }
+}
+operation {
+ type: "ResizeNearestNeighbor"
+ resize_nearest_neighbor_options {
+ align_corners: true
+ }
+ input: "ifm"
+ input: "size"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.recipe
new file mode 100644
index 000000000..ef6b964c9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 8 }
+}
+operand {
+ name: "size"
+ type: INT32
+ shape { dim: 2 }
+ filler { tag: "explicit" arg: "16" arg: "16" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 8 }
+}
+operation {
+ type: "ResizeNearestNeighbor"
+ resize_nearest_neighbor_options {
+ align_corners: true
+ }
+ input: "ifm"
+ input: "size"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.rule b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_ResizeNearestNeighbor_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.recipe
new file mode 100644
index 000000000..2f9ccddfa
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.recipe
@@ -0,0 +1,37 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 3 dim: 2 dim: 3 }
+}
+operand {
+ name: "begin"
+ type: INT32
+ shape { dim: 3 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "0" arg: "0"
+ }
+}
+operand {
+ name: "size"
+ type: INT32
+ shape { dim: 3 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "1" arg: "3"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 3 }
+}
+operation {
+ type: "Slice"
+ input: "ifm"
+ input: "begin"
+ input: "size"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.recipe
new file mode 100644
index 000000000..2f9ccddfa
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.recipe
@@ -0,0 +1,37 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 3 dim: 2 dim: 3 }
+}
+operand {
+ name: "begin"
+ type: INT32
+ shape { dim: 3 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "0" arg: "0"
+ }
+}
+operand {
+ name: "size"
+ type: INT32
+ shape { dim: 3 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "1" arg: "3"
+ }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 1 dim: 3 }
+}
+operation {
+ type: "Slice"
+ input: "ifm"
+ input: "begin"
+ input: "size"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Slice_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Slice_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.recipe
new file mode 100644
index 000000000..ce9abf555
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.recipe
@@ -0,0 +1,20 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Softmax"
+ softmax_options {
+ beta: 0.0
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.recipe
new file mode 100644
index 000000000..ce9abf555
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.recipe
@@ -0,0 +1,20 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Softmax"
+ softmax_options {
+ beta: 0.0
+ }
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Softmax_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.recipe
new file mode 100644
index 000000000..7bdf87d47
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Tanh"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.recipe
new file mode 100644
index 000000000..7bdf87d47
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.recipe
@@ -0,0 +1,17 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "Tanh"
+ input: "ifm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Tanh_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.recipe
new file mode 100644
index 000000000..c281b0482
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.recipe
@@ -0,0 +1,54 @@
+operand {
+ name: "out_shape"
+ type: INT32
+ shape { dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "4" arg: "4" arg: "3"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 3 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "2" arg: "3"
+ }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 3 dim: 1 dim: 1 dim: 3 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+
+operation {
+ type: "TransposeConv"
+ transpose_conv_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "out_shape"
+ input: "ker"
+ input: "ifm"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.rule b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.rule
new file mode 100644
index 000000000..50f235a55
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_000/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "KER_S16" $(tensor_dtype ker) '=' INT16
+RULE "BIAS_S64" $(tensor_dtype bias) '=' INT64
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.recipe
new file mode 100644
index 000000000..c281b0482
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.recipe
@@ -0,0 +1,54 @@
+operand {
+ name: "out_shape"
+ type: INT32
+ shape { dim: 4 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "4" arg: "4" arg: "3"
+ }
+}
+operand {
+ name: "bias"
+ type: FLOAT32
+ shape { dim: 3 }
+ filler {
+ tag: "explicit"
+ arg: "1" arg: "2" arg: "3"
+ }
+}
+operand {
+ name: "ker"
+ type: FLOAT32
+ shape { dim: 3 dim: 1 dim: 1 dim: 3 }
+ filler {
+ tag: "gaussian"
+ arg: "0.0"
+ arg: "1.0"
+ }
+}
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 4 dim: 4 dim: 3 }
+}
+
+operation {
+ type: "TransposeConv"
+ transpose_conv_options {
+ padding: SAME
+ stride_w: 1
+ stride_h: 1
+ }
+ input: "out_shape"
+ input: "ker"
+ input: "ifm"
+ input: "bias"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.rule b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.rule
new file mode 100644
index 000000000..ffa3bc906
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_TransposeConv_001/test.rule
@@ -0,0 +1,13 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "KER_U8" $(tensor_dtype ker) '=' UINT8
+RULE "BIAS_S32" $(tensor_dtype bias) '=' INT32
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.qconf.json
new file mode 100644
index 000000000..ab70bcc16
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "uint8",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "int16",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.recipe
new file mode 100644
index 000000000..82a85c13b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 3 dim: 8 dim: 1 }
+}
+operand {
+ name: "perm"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "1" arg: "2" arg: "0" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 8 dim: 1 dim: 3 }
+}
+
+operation {
+ type: "Transpose"
+ transpose_options {
+ }
+ input: "ifm"
+ input: "perm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.reverse b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.rule
new file mode 100644
index 000000000..71f381e2d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: U8, Target Op dtype: S16
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_U8" $(tensor_dtype ifm) '=' UINT8
+RULE "IFM_QUANTIZE_S16" $(tensor_dtype ifm_Quantize) '=' INT16
+RULE "TARGET_S16" $(tensor_dtype ofm) '=' INT16
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' UINT8
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.qconf.json
new file mode 100644
index 000000000..010fa65fd
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+ "default_quantization_dtype" : "int16",
+ "default_granularity" : "channel",
+ "layers" : [
+ {
+ "name" : "ofm",
+ "dtype" : "uint8",
+ "granularity" : "channel"
+ }
+ ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.recipe
new file mode 100644
index 000000000..82a85c13b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 3 dim: 8 dim: 1 }
+}
+operand {
+ name: "perm"
+ type: INT32
+ shape { dim: 3 }
+ filler { tag: "explicit" arg: "1" arg: "2" arg: "0" }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 8 dim: 1 dim: 3 }
+}
+
+operation {
+ type: "Transpose"
+ transpose_options {
+ }
+ input: "ifm"
+ input: "perm"
+ output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.reverse b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.reverse
diff --git a/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.rule
new file mode 100644
index 000000000..b07ac58e8
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Transpose_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed quantization.
+# Default dtype: S16, Target Op dtype: U8
+# Quantize Ops are inserted at the beginning/end of the model.
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "IFM_S16" $(tensor_dtype ifm) '=' INT16
+RULE "IFM_QUANTIZE_U8" $(tensor_dtype ifm_Quantize) '=' UINT8
+RULE "TARGET_U8" $(tensor_dtype ofm) '=' UINT8
+RULE "OUTPUT_S16" $(tensor_dtype ofm_Quantize) '=' INT16
+RULE "QUANTIZE_OP" $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/StridedSlice_004/test.recipe b/res/TensorFlowLiteRecipes/StridedSlice_004/test.recipe
new file mode 100644
index 000000000..edc8efd9d
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/StridedSlice_004/test.recipe
@@ -0,0 +1,46 @@
+#
+# Failed case from https://github.com/Samsung/ONE/issues/9439
+#
+operand {
+ name: "Placeholder"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 32 }
+ is_variable: false
+}
+operand {
+ name: "strided_slice/stack_2"
+ type: INT32
+ shape { dim: 4 }
+ filler { tag: "explicit" arg: "1" arg: "-1" arg: "1" arg: "1" }
+ is_variable: false
+}
+operand {
+ name: "strided_slice/stack"
+ type: INT32
+ shape { dim: 4 }
+ filler { tag: "explicit" arg: "0" arg: "0" arg: "0" arg: "0" }
+ is_variable: false
+}
+operand {
+ name: "strided_slice"
+ type: FLOAT32
+ shape { dim: 1 dim: 16 dim: 16 dim: 32 }
+ is_variable: false
+}
+operation {
+ type: "StridedSlice"
+ input: "Placeholder"
+ input: "strided_slice/stack"
+ input: "strided_slice/stack"
+ input: "strided_slice/stack_2"
+ output: "strided_slice"
+ strided_slice_options {
+ begin_mask: 15
+ end_mask: 15
+ ellipsis_mask: 0
+ new_axis_mask: 0
+ shrink_axis_mask: 0
+ }
+}
+input: "Placeholder"
+output: "strided_slice"
diff --git a/res/TensorFlowLiteRecipes/StridedSlice_004/test.reverse b/res/TensorFlowLiteRecipes/StridedSlice_004/test.reverse
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/StridedSlice_004/test.reverse
diff --git a/res/TensorFlowPythonExamples/examples/AddV2/__init__.py b/res/TensorFlowPythonExamples/examples/AddV2/__init__.py
index 8114c50b1..0cfa27e43 100644
--- a/res/TensorFlowPythonExamples/examples/AddV2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/AddV2/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.raw_ops.AddV2(x=lhs_, y=rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py b/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py
index b9f7a1cc1..5f851cb21 100644
--- a/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/BatchMatMulV2/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 5, 4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.raw_ops.BatchMatMulV2(x=lhs_, y=rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py b/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py
index d28034bf9..b4f0297a3 100644
--- a/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/Bidirectional_LSTM/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[28, 28, 3], name="Hole")
op_uni_ = tf.compat.v1.keras.layers.LSTM(1, time_major=False, return_sequences=True)
diff --git a/res/TensorFlowPythonExamples/examples/PadV2/__init__.py b/res/TensorFlowPythonExamples/examples/PadV2/__init__.py
index 99940bf85..995efd5ee 100644
--- a/res/TensorFlowPythonExamples/examples/PadV2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/PadV2/__init__.py
@@ -1,6 +1,8 @@
import tensorflow as tf
import numpy as np
+tf.compat.v1.disable_eager_execution()
+
input_ = tf.compat.v1.placeholder(shape=[1, 1, 1, 1], dtype=tf.float32)
paddings_ = tf.compat.v1.constant(
np.array([[1, 1], [2, 2], [3, 3], [4, 4]], dtype=np.int32))
diff --git a/res/TensorFlowPythonExamples/examples/abs/__init__.py b/res/TensorFlowPythonExamples/examples/abs/__init__.py
index fd5515595..83ac3cb33 100755
--- a/res/TensorFlowPythonExamples/examples/abs/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/abs/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
abs_ = tf.compat.v1.abs(in_)
diff --git a/res/TensorFlowPythonExamples/examples/add/__init__.py b/res/TensorFlowPythonExamples/examples/add/__init__.py
index 7e283f35f..39790a0e5 100755
--- a/res/TensorFlowPythonExamples/examples/add/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/add/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.add(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/add_n/__init__.py b/res/TensorFlowPythonExamples/examples/add_n/__init__.py
index afd068d0d..c8e23c940 100644
--- a/res/TensorFlowPythonExamples/examples/add_n/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/add_n/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in1_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
in2_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
in3_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/argmax/__init__.py b/res/TensorFlowPythonExamples/examples/argmax/__init__.py
index 059df97f9..b8791b46e 100755
--- a/res/TensorFlowPythonExamples/examples/argmax/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/argmax/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
op_ = tf.compat.v1.math.argmax(in_)
diff --git a/res/TensorFlowPythonExamples/examples/argmin/__init__.py b/res/TensorFlowPythonExamples/examples/argmin/__init__.py
index f9a54627f..39f3278a5 100644
--- a/res/TensorFlowPythonExamples/examples/argmin/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/argmin/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
op_ = tf.compat.v1.math.argmin(in_)
diff --git a/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py b/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py
index 90756b0b0..c430749f3 100644
--- a/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/atrous_conv2d/__init__.py
@@ -1,6 +1,8 @@
import tensorflow as tf
import numpy as np
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole")
filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32)
diff --git a/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py b/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py
index a8ab0ddc4..814cf5787 100644
--- a/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/average_pool_2d/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 8, 8, 1), name="Hole")
op_ = tf.compat.v1.nn.avg_pool2d(in_, (2, 2), 1, "VALID")
diff --git a/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py b/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py
index e86555220..4a7787073 100644
--- a/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/batch_normalization/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
mean = tf.compat.v1.constant([1., 2., 3.])
variance = tf.compat.v1.constant([4., 5., 6.])
offset = tf.compat.v1.constant([7., 8., 9.])
diff --git a/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py b/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py
index 1dd08b0ee..9efa85c2d 100644
--- a/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/batch_to_space/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(tf.float32, shape=[4, 1, 1, 1], name="Hole")
cr_ = tf.constant([[0, 0], [0, 0]], name="Hole")
op_ = tf.batch_to_space(in_, cr_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/biasadd/__init__.py b/res/TensorFlowPythonExamples/examples/biasadd/__init__.py
index eb8a69bc3..72ffe10ae 100755
--- a/res/TensorFlowPythonExamples/examples/biasadd/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/biasadd/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1, 2, 3), name="Hole")
op_ = tf.nn.bias_add(in_, bias=[1.0, 1.0, -1.0], data_format="NHWC")
diff --git a/res/TensorFlowPythonExamples/examples/cast/__init__.py b/res/TensorFlowPythonExamples/examples/cast/__init__.py
index 4c0adc09f..5919e0de2 100644
--- a/res/TensorFlowPythonExamples/examples/cast/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/cast/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
cast_ = tf.cast(in_, tf.int32)
diff --git a/res/TensorFlowPythonExamples/examples/ceil/__init__.py b/res/TensorFlowPythonExamples/examples/ceil/__init__.py
index 5178f8fe8..79737c8ab 100755
--- a/res/TensorFlowPythonExamples/examples/ceil/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/ceil/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
op_ = tf.compat.v1.ceil(in_)
diff --git a/res/TensorFlowPythonExamples/examples/concat/__init__.py b/res/TensorFlowPythonExamples/examples/concat/__init__.py
index ec59b242f..c1c7b1aeb 100644
--- a/res/TensorFlowPythonExamples/examples/concat/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/concat/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in1_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole1")
in2_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2, 4), name="Hole2")
concat_ = tf.compat.v1.concat([in1_, in2_], axis=-2)
diff --git a/res/TensorFlowPythonExamples/examples/cond/__init__.py b/res/TensorFlowPythonExamples/examples/cond/__init__.py
index deafbb162..660ec9b84 100644
--- a/res/TensorFlowPythonExamples/examples/cond/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/cond/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleX")
y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleY")
z_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleZ")
diff --git a/res/TensorFlowPythonExamples/examples/cond_1/__init__.py b/res/TensorFlowPythonExamples/examples/cond_1/__init__.py
index fed192018..da8809482 100644
--- a/res/TensorFlowPythonExamples/examples/cond_1/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/cond_1/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleX")
y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleY")
z_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[], name="HoleZ")
diff --git a/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py b/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py
index fa4f72f99..7cf8dee52 100644
--- a/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/conv2d_1/__init__.py
@@ -1,6 +1,8 @@
import tensorflow as tf
import numpy as np
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole")
filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32)
diff --git a/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py b/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py
index 680bb36fd..812fef12b 100644
--- a/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/conv2d_2/__init__.py
@@ -1,6 +1,8 @@
import tensorflow as tf
import numpy as np
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 3), name="Hole")
filters = np.random.uniform(low=-1., high=1, size=[5, 5, 3, 32]).astype(np.float32)
diff --git a/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py b/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py
index 17fd6e20a..cd317cee9 100644
--- a/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/conv2d_transpose/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
input_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 8, 8, 1), name="Hole")
kernel_ = tf.compat.v1.placeholder(tf.float32, shape=(3, 3, 1, 1), name="Hole")
op_ = tf.compat.v1.nn.conv2d_transpose(
diff --git a/res/TensorFlowPythonExamples/examples/cos/__init__.py b/res/TensorFlowPythonExamples/examples/cos/__init__.py
index cfce5d830..3271ddb96 100755
--- a/res/TensorFlowPythonExamples/examples/cos/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/cos/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.cos(in_)
diff --git a/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py b/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py
index 0cbc304fa..c11766ed0 100644
--- a/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/depth_to_space/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 1, 1, 4], name="Hole")
op_ = tf.nn.depth_to_space(in_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py
index 7df1938cc..a9c8b33eb 100644
--- a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_1/__init__.py
@@ -1,6 +1,8 @@
import tensorflow as tf
import numpy as np
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 4), name="Hole")
filters = np.array(
diff --git a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py
index 4800ebd82..8fbd0da49 100644
--- a/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/depthwise_conv2d_2/__init__.py
@@ -1,6 +1,8 @@
import tensorflow as tf
import numpy as np
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(tf.float32, shape=(1, 32, 32, 4), name="Hole")
filters = np.array(
diff --git a/res/TensorFlowPythonExamples/examples/div/__init__.py b/res/TensorFlowPythonExamples/examples/div/__init__.py
index 2887771ff..9acf9166b 100755
--- a/res/TensorFlowPythonExamples/examples/div/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/div/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.div(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/elu/__init__.py b/res/TensorFlowPythonExamples/examples/elu/__init__.py
index b41f65111..91c620927 100755
--- a/res/TensorFlowPythonExamples/examples/elu/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/elu/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
elu_ = tf.compat.v1.nn.elu(in_)
diff --git a/res/TensorFlowPythonExamples/examples/exp/__init__.py b/res/TensorFlowPythonExamples/examples/exp/__init__.py
index e83638436..5a7c88d8c 100644
--- a/res/TensorFlowPythonExamples/examples/exp/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/exp/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.exp(in_)
diff --git a/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py b/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py
index ab6a87fc7..1f99c1107 100644
--- a/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/expand_dims_00/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
# example 1 where input has all known dims and axis is const
in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(2, 3), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py b/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py
index 36c54753b..1b1626a32 100644
--- a/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/expand_dims_01/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
# example 2 where input has unknown dim and axis is const
in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(None, None), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py b/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py
index 6304c2344..c73b0ba2f 100644
--- a/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/expand_dims_02/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
# example 3 where input has all known dim and axis is not const
in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(2, 3), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/fill/__init__.py b/res/TensorFlowPythonExamples/examples/fill/__init__.py
index f8413bb36..1c9d20476 100644
--- a/res/TensorFlowPythonExamples/examples/fill/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/fill/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(), name="Hole")
op_ = tf.compat.v1.fill((3, 4), in_)
diff --git a/res/TensorFlowPythonExamples/examples/flatten/__init__.py b/res/TensorFlowPythonExamples/examples/flatten/__init__.py
index bb6dbaa2b..3f135688e 100644
--- a/res/TensorFlowPythonExamples/examples/flatten/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/flatten/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 3), name="Hole")
op_ = tf.compat.v1.layers.flatten(in_)
diff --git a/res/TensorFlowPythonExamples/examples/floor/__init__.py b/res/TensorFlowPythonExamples/examples/floor/__init__.py
index 3b3f5bfc3..0357cee3b 100755
--- a/res/TensorFlowPythonExamples/examples/floor/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/floor/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
op_ = tf.compat.v1.floor(in_)
diff --git a/res/TensorFlowPythonExamples/examples/floordiv/__init__.py b/res/TensorFlowPythonExamples/examples/floordiv/__init__.py
index 34f413f2b..5714bf563 100755
--- a/res/TensorFlowPythonExamples/examples/floordiv/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/floordiv/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.floordiv(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/floormod/__init__.py b/res/TensorFlowPythonExamples/examples/floormod/__init__.py
index c06e2a9ed..f4e1a5f33 100644
--- a/res/TensorFlowPythonExamples/examples/floormod/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/floormod/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.floormod(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py b/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py
index 5e13b0d82..628420c3b 100644
--- a/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/fused_batch_norm/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
scale = tf.compat.v1.constant([1., 2., 3.])
offset = tf.compat.v1.constant([4., 5., 6.])
mean = tf.constant([1., 2., 3.])
diff --git a/res/TensorFlowPythonExamples/examples/gather/__init__.py b/res/TensorFlowPythonExamples/examples/gather/__init__.py
index 173be4a97..67b4d07fc 100644
--- a/res/TensorFlowPythonExamples/examples/gather/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/gather/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
param_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 2, 3, 4), name="Hole")
indices_ = tf.constant([1, 2])
op_ = tf.gather(param_, indices_, axis=2)
diff --git a/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py b/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py
index 1ff11d568..8c0df3629 100644
--- a/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/gather_nd/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
param_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2, 2), name="Hole")
indices_ = tf.constant([[0, 1], [1, 0]])
op_ = tf.gather_nd(param_, indices_)
diff --git a/res/TensorFlowPythonExamples/examples/greater/__init__.py b/res/TensorFlowPythonExamples/examples/greater/__init__.py
index e88f57471..b8578e3b2 100755
--- a/res/TensorFlowPythonExamples/examples/greater/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/greater/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.greater(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py b/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py
index b15fbd324..cf10e4d4e 100755
--- a/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/greater_equal/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.greater_equal(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/gru/__init__.py b/res/TensorFlowPythonExamples/examples/gru/__init__.py
index 26ee75d2e..0d4718937 100755
--- a/res/TensorFlowPythonExamples/examples/gru/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/gru/__init__.py
@@ -1,6 +1,8 @@
import tensorflow as tf
from tensorflow import keras
+tf.compat.v1.disable_eager_execution()
+
model = keras.Sequential()
shape = (4, 4)
model.add(keras.layers.GRU(2, input_shape=shape))
diff --git a/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py b/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py
index b44942c39..62a774e4a 100644
--- a/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/instance_norm/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
sess = tf.Session()
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 3), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py b/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py
index 0dda6bfc8..fe26e0684 100644
--- a/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/l2_normalize/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
arg = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.math.l2_normalize(arg)
diff --git a/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py b/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py
index d595edbd0..c1899de56 100755
--- a/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/leaky_relu/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
op_ = tf.compat.v1.nn.leaky_relu(in_)
diff --git a/res/TensorFlowPythonExamples/examples/less/__init__.py b/res/TensorFlowPythonExamples/examples/less/__init__.py
index 41ba18c62..6fee74aa5 100755
--- a/res/TensorFlowPythonExamples/examples/less/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/less/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.less(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/less_equal/__init__.py b/res/TensorFlowPythonExamples/examples/less_equal/__init__.py
index d60bf2a73..fdca6490a 100755
--- a/res/TensorFlowPythonExamples/examples/less_equal/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/less_equal/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.less_equal(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py b/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py
index eca6b2267..c358bd06e 100644
--- a/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/local_response_normalization/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 4, 4, 20), name="Hole")
op_ = tf.compat.v1.nn.lrn(x_, 5, 1.0, 1.0, 0.5)
diff --git a/res/TensorFlowPythonExamples/examples/log/__init__.py b/res/TensorFlowPythonExamples/examples/log/__init__.py
index cb206c058..d8787ef7d 100644
--- a/res/TensorFlowPythonExamples/examples/log/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/log/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.math.log(in_)
diff --git a/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py b/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py
index 651888c71..a13f211c7 100644
--- a/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/log_softmax/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
op_ = tf.compat.v1.nn.log_softmax(in_)
diff --git a/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py b/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py
index c3d458942..856ebd968 100644
--- a/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/log_softmax_2/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4, 5), name="Hole")
op_ = tf.compat.v1.nn.log_softmax(in_, axis=1)
diff --git a/res/TensorFlowPythonExamples/examples/logical_and/__init__.py b/res/TensorFlowPythonExamples/examples/logical_and/__init__.py
index f546fae9f..d0c4ea2ac 100755
--- a/res/TensorFlowPythonExamples/examples/logical_and/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/logical_and/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.logical_and(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/logical_not/__init__.py b/res/TensorFlowPythonExamples/examples/logical_not/__init__.py
index f1bcc2c8f..532d5ff1f 100755
--- a/res/TensorFlowPythonExamples/examples/logical_not/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/logical_not/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.logical_not(in_)
diff --git a/res/TensorFlowPythonExamples/examples/logical_or/__init__.py b/res/TensorFlowPythonExamples/examples/logical_or/__init__.py
index 991d61ab9..ce584eaf4 100755
--- a/res/TensorFlowPythonExamples/examples/logical_or/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/logical_or/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.logical_or(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/lstm/__init__.py b/res/TensorFlowPythonExamples/examples/lstm/__init__.py
index c07948bde..99ef3c27f 100755
--- a/res/TensorFlowPythonExamples/examples/lstm/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/lstm/__init__.py
@@ -1,6 +1,8 @@
import tensorflow as tf
from tensorflow import keras
+tf.compat.v1.disable_eager_execution()
+
model = keras.Sequential()
shape = (4, 4)
model.add(keras.layers.LSTM(2, input_shape=shape))
diff --git a/res/TensorFlowPythonExamples/examples/matmul/__init__.py b/res/TensorFlowPythonExamples/examples/matmul/__init__.py
index 760241de7..6f049e50a 100755
--- a/res/TensorFlowPythonExamples/examples/matmul/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/matmul/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 4), name="Hole")
rhs_ = tf.compat.v1.constant(dtype=tf.float32, shape=(4, 4), name="Hole", value=1.0)
op_ = tf.compat.v1.matmul(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py b/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py
index 43d4d8754..a708f35c4 100644
--- a/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/matrix_band_part/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.matrix_band_part(in_, 1, -1)
diff --git a/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py b/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py
index 384a29853..cd789eaca 100644
--- a/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/matrix_diag/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.matrix_diag(in_)
diff --git a/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py b/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py
index e8878f02f..55b869037 100644
--- a/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/matrix_set_diag/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole")
diag_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole")
op_ = tf.compat.v1.matrix_set_diag(in_, diag_)
diff --git a/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py b/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py
index 487858cc5..78daa034c 100755
--- a/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/max_pool_with_argmax/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 4, 4, 1), name="Hole")
op_ = tf.compat.v1.nn.max_pool_with_argmax(
in_, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding="VALID")
diff --git a/res/TensorFlowPythonExamples/examples/maximum/__init__.py b/res/TensorFlowPythonExamples/examples/maximum/__init__.py
index a96fe03a7..0656ba4e6 100755
--- a/res/TensorFlowPythonExamples/examples/maximum/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/maximum/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.maximum(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/minimum/__init__.py b/res/TensorFlowPythonExamples/examples/minimum/__init__.py
index ef664dbf6..ebd795e38 100755
--- a/res/TensorFlowPythonExamples/examples/minimum/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/minimum/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.minimum(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/multiply/__init__.py b/res/TensorFlowPythonExamples/examples/multiply/__init__.py
index da8885660..68dff1e61 100755
--- a/res/TensorFlowPythonExamples/examples/multiply/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/multiply/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.multiply(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/negative/__init__.py b/res/TensorFlowPythonExamples/examples/negative/__init__.py
index 86713da7b..473dc9b97 100644
--- a/res/TensorFlowPythonExamples/examples/negative/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/negative/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
neg_ = tf.math.negative(in_)
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py
index b8f010c67..2598b531b 100644
--- a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
max_output_size = tf.compat.v1.constant(4)
in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8, 4), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py
index 42e7bf06c..932ad3534 100644
--- a/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_padded_2/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
max_output_size = tf.compat.v1.constant(6)
iou_threshold = tf.compat.v1.constant(0.5)
score_threshold = tf.compat.v1.constant(0.6)
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py
index 32c6173b0..c251b9271 100644
--- a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
max_output_size = tf.compat.v1.constant(4)
in_boxes_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(8, 4), name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py
index 415f9209f..a7185c3ee 100644
--- a/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/non_max_suppression_with_scores_2/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
max_output_size = tf.compat.v1.constant(6)
iou_threshold = tf.compat.v1.constant(0.5)
score_threshold = tf.compat.v1.constant(0.6)
diff --git a/res/TensorFlowPythonExamples/examples/not_equal/__init__.py b/res/TensorFlowPythonExamples/examples/not_equal/__init__.py
index 95073fe4a..955eb1f9f 100755
--- a/res/TensorFlowPythonExamples/examples/not_equal/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/not_equal/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.not_equal(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/one_hot/__init__.py b/res/TensorFlowPythonExamples/examples/one_hot/__init__.py
index 49e0346d3..b99bb9ca0 100644
--- a/res/TensorFlowPythonExamples/examples/one_hot/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/one_hot/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
indice_ = tf.compat.v1.placeholder(tf.int32, shape=(1, 2, 3, 4), name='Hole')
depth_ = tf.compat.v1.placeholder(tf.int32, shape=(), name='Hole')
on_value_ = tf.compat.v1.placeholder(tf.int32, shape=(), name='Hole')
diff --git a/res/TensorFlowPythonExamples/examples/pack/__init__.py b/res/TensorFlowPythonExamples/examples/pack/__init__.py
index 609bc9b76..4f1c46baa 100755
--- a/res/TensorFlowPythonExamples/examples/pack/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/pack/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_1 = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole")
in_2 = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4), name="Hole")
op_ = tf.compat.v1.stack([in_1, in_2])
diff --git a/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py b/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py
index dc877f119..a78e21571 100644
--- a/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/pad-reflect/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
tensor_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole")
paddings_ = tf.constant([[1, 1], [2, 2]], name="Hole")
op_ = tf.pad(tensor_, paddings_, "REFLECT")
diff --git a/res/TensorFlowPythonExamples/examples/pad/__init__.py b/res/TensorFlowPythonExamples/examples/pad/__init__.py
index ac5cf81fa..7097b7592 100755
--- a/res/TensorFlowPythonExamples/examples/pad/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/pad/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
tensor_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole")
paddings_ = tf.compat.v1.constant([[1, 1], [2, 2]], name="Hole")
op_ = tf.compat.v1.pad(tensor_, paddings_)
diff --git a/res/TensorFlowPythonExamples/examples/pow/__init__.py b/res/TensorFlowPythonExamples/examples/pow/__init__.py
index 960032a84..12a19f2b0 100755
--- a/res/TensorFlowPythonExamples/examples/pow/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/pow/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.pow(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/prelu/__init__.py b/res/TensorFlowPythonExamples/examples/prelu/__init__.py
index b0e7c7b9d..7e43f5101 100644
--- a/res/TensorFlowPythonExamples/examples/prelu/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/prelu/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
input_tensor = tf.compat.v1.placeholder(
dtype=tf.float32, name="input", shape=[1, 4, 4, 3])
prelu = tf.keras.layers.PReLU(shared_axes=[1, 2])
diff --git a/res/TensorFlowPythonExamples/examples/range/__init__.py b/res/TensorFlowPythonExamples/examples/range/__init__.py
index 0f032e9d1..9b57167b0 100644
--- a/res/TensorFlowPythonExamples/examples/range/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/range/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
# this modified example comes from TF API reference
start = 1
limit = 10
diff --git a/res/TensorFlowPythonExamples/examples/rank/__init__.py b/res/TensorFlowPythonExamples/examples/rank/__init__.py
index c9b970718..ab2bc79dc 100644
--- a/res/TensorFlowPythonExamples/examples/rank/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/rank/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4, 3, 3), name="Hole")
rank_ = tf.compat.v1.rank(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py
index eb9167f72..2fee752d4 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_all/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
input_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(2, 4), name="Hole")
op_ = tf.compat.v1.reduce_all(input_, axis=1, keepdims=False)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py
index f87c25166..0e87a0c6e 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_any/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=(2, 2), name="Hole")
op_ = tf.compat.v1.math.reduce_any(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py
index 27e48df72..dc5e0d648 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_max/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2), name="Hole")
op_ = tf.compat.v1.math.reduce_max(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py
index b3cf0346a..fe81336d4 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_min/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2), name="Hole")
op_ = tf.compat.v1.math.reduce_min(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py b/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py
index 4d134ae32..9fe2ee295 100644
--- a/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reduce_prod/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 2), name="Hole")
op_ = tf.compat.v1.math.reduce_prod(in_)
diff --git a/res/TensorFlowPythonExamples/examples/relu/__init__.py b/res/TensorFlowPythonExamples/examples/relu/__init__.py
index a144a1212..69e075332 100755
--- a/res/TensorFlowPythonExamples/examples/relu/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/relu/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
op_ = tf.compat.v1.nn.relu(in_)
diff --git a/res/TensorFlowPythonExamples/examples/relu6/__init__.py b/res/TensorFlowPythonExamples/examples/relu6/__init__.py
index f58ae7c2c..d581d3936 100755
--- a/res/TensorFlowPythonExamples/examples/relu6/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/relu6/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
op_ = tf.compat.v1.nn.relu6(in_)
diff --git a/res/TensorFlowPythonExamples/examples/reshape/__init__.py b/res/TensorFlowPythonExamples/examples/reshape/__init__.py
index f451bacb9..c60c0a6d8 100644
--- a/res/TensorFlowPythonExamples/examples/reshape/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reshape/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.reshape(in_, shape=[2, 2, 2, 2])
diff --git a/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py b/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py
index 422bf1db5..773fc07c9 100755
--- a/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/resize_bilinear/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 8, 8, 3), name="Hole")
op_ = tf.compat.v1.image.resize_bilinear(in_, [16, 16])
diff --git a/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py b/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py
index a14022948..3e688d328 100755
--- a/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/resize_nearest_neighbor/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 8, 8, 3), name="Hole")
op_ = tf.compat.v1.image.resize_nearest_neighbor(in_, [16, 16])
diff --git a/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py b/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py
index aebd4fc50..4b7a9cf26 100755
--- a/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reverse_sequence/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 8), name="Hole")
op_ = tf.compat.v1.reverse_sequence(in_, [7, 2, 3, 5], seq_axis=1, batch_axis=0)
diff --git a/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py b/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py
index e6afc995c..0404cd660 100755
--- a/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/reverse_v2/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3, 4, 5), name="Hole")
op_ = tf.compat.v1.reverse_v2(in_, [3, 2])
diff --git a/res/TensorFlowPythonExamples/examples/rnn/__init__.py b/res/TensorFlowPythonExamples/examples/rnn/__init__.py
index 5e76951c2..9c1e69c2e 100755
--- a/res/TensorFlowPythonExamples/examples/rnn/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/rnn/__init__.py
@@ -1,6 +1,8 @@
import tensorflow as tf
from tensorflow import keras
+tf.compat.v1.disable_eager_execution()
+
model = keras.Sequential()
shape = (4, 4)
model.add(keras.layers.SimpleRNN(2, input_shape=shape))
diff --git a/res/TensorFlowPythonExamples/examples/round/__init__.py b/res/TensorFlowPythonExamples/examples/round/__init__.py
index 9a00ad558..6cda033e2 100755
--- a/res/TensorFlowPythonExamples/examples/round/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/round/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
op_ = tf.compat.v1.round(in_)
diff --git a/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py b/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py
index 90500bd11..dc81e48aa 100755
--- a/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/rsqrt/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
op_ = tf.compat.v1.rsqrt(in_)
diff --git a/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py b/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py
index e094b5705..0158e3ca6 100644
--- a/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/scatter_nd/__init__.py
@@ -2,6 +2,8 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
indices = tf.compat.v1.constant([[0], [2]])
updates = tf.compat.v1.constant([[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8,
diff --git a/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py b/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py
index 24d15bb8b..c15746a66 100755
--- a/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/segment_sum/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4, 4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.int32, shape=(4, ), name="Hole")
op_ = tf.compat.v1.math.segment_sum(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/shape/__init__.py b/res/TensorFlowPythonExamples/examples/shape/__init__.py
index 4c13a338f..b719eb9fc 100644
--- a/res/TensorFlowPythonExamples/examples/shape/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/shape/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, 2, 3), name="Hole")
op_ = tf.compat.v1.shape(in_)
diff --git a/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py b/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py
index 43328f2cb..1749071f0 100755
--- a/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sigmoid/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
op_ = tf.compat.v1.nn.sigmoid(in_)
diff --git a/res/TensorFlowPythonExamples/examples/sin/__init__.py b/res/TensorFlowPythonExamples/examples/sin/__init__.py
index 0bfdcffed..75ea73b85 100644
--- a/res/TensorFlowPythonExamples/examples/sin/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sin/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.sin(in_)
diff --git a/res/TensorFlowPythonExamples/examples/slice/__init__.py b/res/TensorFlowPythonExamples/examples/slice/__init__.py
index 45f9044d1..b734dc22b 100644
--- a/res/TensorFlowPythonExamples/examples/slice/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/slice/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 2, 3), name="Hole")
op_ = tf.compat.v1.slice(in_, [1, 0, 0], [1, 1, 3])
diff --git a/res/TensorFlowPythonExamples/examples/softmax/__init__.py b/res/TensorFlowPythonExamples/examples/softmax/__init__.py
index 5b8d1cdfb..3c93e8a2b 100755
--- a/res/TensorFlowPythonExamples/examples/softmax/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/softmax/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
op_ = tf.compat.v1.nn.softmax(in_)
diff --git a/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py b/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py
index e088012e9..b0e3d85ab 100644
--- a/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/space_to_batch/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 2, 2, 1], name="Hole")
pd_ = tf.constant([[0, 0], [0, 0]], name="Hole")
op_ = tf.space_to_batch(in_, pd_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py b/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py
index 760195063..892796b12 100644
--- a/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/space_to_batch_nd/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 2, 2, 1], name="Hole")
bs_ = tf.constant([2, 2], name="Hole")
pd_ = tf.constant([[0, 0], [0, 0]], name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py b/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py
index e9bc945bb..e146f6aa3 100644
--- a/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/space_to_depth/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(tf.float32, shape=[1, 2, 2, 1], name="Hole")
op_ = tf.nn.space_to_depth(in_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py b/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py
index 5fe0bc4d0..0ce8f0bdd 100644
--- a/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sparse_to_dense/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.sparse_placeholder(tf.float32, name="Hole")
op_ = tf.compat.v1.sparse_tensor_to_dense(in_)
diff --git a/res/TensorFlowPythonExamples/examples/split/__init__.py b/res/TensorFlowPythonExamples/examples/split/__init__.py
index 4226f30de..11f542751 100644
--- a/res/TensorFlowPythonExamples/examples/split/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/split/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 3), name="Hole")
op_ = tf.compat.v1.split(in_, 2)
diff --git a/res/TensorFlowPythonExamples/examples/split_2/__init__.py b/res/TensorFlowPythonExamples/examples/split_2/__init__.py
index 03777df15..6212c6e81 100644
--- a/res/TensorFlowPythonExamples/examples/split_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/split_2/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 3), name="Hole")
op_ = tf.compat.v1.split(in_, [1, 2, 1])
diff --git a/res/TensorFlowPythonExamples/examples/sqrt/__init__.py b/res/TensorFlowPythonExamples/examples/sqrt/__init__.py
index 4aab5da9c..8e304e80c 100755
--- a/res/TensorFlowPythonExamples/examples/sqrt/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sqrt/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
op_ = tf.compat.v1.sqrt(in_)
diff --git a/res/TensorFlowPythonExamples/examples/square/__init__.py b/res/TensorFlowPythonExamples/examples/square/__init__.py
index 2d03e9b89..f0c3e4410 100644
--- a/res/TensorFlowPythonExamples/examples/square/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/square/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.math.square(in_)
diff --git a/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py b/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py
index baacf5622..6e86f843d 100755
--- a/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/squared_difference/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.squared_difference(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py b/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py
index d054f01a2..ba2348c1e 100755
--- a/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/squeeze_1/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 1, 4), name="Hole")
op_ = tf.compat.v1.squeeze(in_)
diff --git a/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py b/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py
index 5715bed0e..d6134589a 100755
--- a/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/squeeze_2/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 4, 1, 1), name="Hole")
op_ = tf.compat.v1.squeeze(in_, (0, 2))
diff --git a/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py b/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py
index 2d7234df2..a6fa99a75 100644
--- a/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/strided_slice/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(3, 2, 3), name="Hole")
op_ = tf.compat.v1.strided_slice(in_, [1, 0, 0], [2, 1, 3], [1, 1, 1])
diff --git a/res/TensorFlowPythonExamples/examples/subtract/__init__.py b/res/TensorFlowPythonExamples/examples/subtract/__init__.py
index feb11b12e..39cdbc3a2 100755
--- a/res/TensorFlowPythonExamples/examples/subtract/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/subtract/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
lhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
rhs_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.compat.v1.subtract(lhs_, rhs_)
diff --git a/res/TensorFlowPythonExamples/examples/sum/__init__.py b/res/TensorFlowPythonExamples/examples/sum/__init__.py
index 69297d6a0..14e408ca0 100644
--- a/res/TensorFlowPythonExamples/examples/sum/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/sum/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 8, 8, 4), name="Hole")
op_ = tf.compat.v1.reduce_sum(in_, -1, True)
diff --git a/res/TensorFlowPythonExamples/examples/tanh/__init__.py b/res/TensorFlowPythonExamples/examples/tanh/__init__.py
index dd202a78d..ccd37579a 100755
--- a/res/TensorFlowPythonExamples/examples/tanh/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/tanh/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 1), name="Hole")
op_ = tf.compat.v1.tanh(in_)
diff --git a/res/TensorFlowPythonExamples/examples/tile/__init__.py b/res/TensorFlowPythonExamples/examples/tile/__init__.py
index aad4e73dd..f5d4ef8e4 100755
--- a/res/TensorFlowPythonExamples/examples/tile/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/tile/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(2, 3), name="Hole")
multiples_ = tf.compat.v1.constant([1, 2], name="Hole")
op_ = tf.compat.v1.tile(in_, multiples_)
diff --git a/res/TensorFlowPythonExamples/examples/top_k/__init__.py b/res/TensorFlowPythonExamples/examples/top_k/__init__.py
index e7b823400..05c330630 100644
--- a/res/TensorFlowPythonExamples/examples/top_k/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/top_k/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[4], name="Hole")
op_ = tf.compat.v1.math.top_k(in_, k=1)
diff --git a/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py b/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py
index eaeb32ac3..3dde2b9c9 100644
--- a/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/unidirectional_sequence_LSTM/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[28, 28, 3], name="Hole")
op_ = tf.compat.v1.keras.layers.LSTM(1, time_major=False, return_sequences=True)(in_)
diff --git a/res/TensorFlowPythonExamples/examples/unique/__init__.py b/res/TensorFlowPythonExamples/examples/unique/__init__.py
index ad65757d0..00e4f3caf 100644
--- a/res/TensorFlowPythonExamples/examples/unique/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/unique/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(6), name="Hole")
op_ = tf.compat.v1.unique(in_)
diff --git a/res/TensorFlowPythonExamples/examples/unstack/__init__.py b/res/TensorFlowPythonExamples/examples/unstack/__init__.py
index e4ffa2119..2a178569f 100644
--- a/res/TensorFlowPythonExamples/examples/unstack/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/unstack/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[4, 2, 3, 4], name="Hole")
unpack_ = tf.compat.v1.unstack(in_, axis=0)
diff --git a/res/TensorFlowPythonExamples/examples/where/__init__.py b/res/TensorFlowPythonExamples/examples/where/__init__.py
index 69c89c8db..94b747259 100644
--- a/res/TensorFlowPythonExamples/examples/where/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/where/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[2], name="Hole")
where_ = tf.compat.v1.where(in_)
diff --git a/res/TensorFlowPythonExamples/examples/where_2/__init__.py b/res/TensorFlowPythonExamples/examples/where_2/__init__.py
index 78c50e0fe..19ad0f2f0 100644
--- a/res/TensorFlowPythonExamples/examples/where_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/where_2/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_b_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[2], name="Hole")
in_x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 3], name="Hole")
in_y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 3], name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/where_v2/__init__.py b/res/TensorFlowPythonExamples/examples/where_v2/__init__.py
index de87af72a..b6cc7de9e 100644
--- a/res/TensorFlowPythonExamples/examples/where_v2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/where_v2/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[2], name="Hole")
where_v2_ = tf.compat.v1.where_v2(in_)
diff --git a/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py b/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py
index 4ce17ca11..e3ffe03b7 100644
--- a/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/where_v2_2/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_b_ = tf.compat.v1.placeholder(dtype=tf.bool, shape=[3], name="Hole")
in_x_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 1], name="Hole")
in_y_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=[1, 3], name="Hole")
diff --git a/res/TensorFlowPythonExamples/examples/while/__init__.py b/res/TensorFlowPythonExamples/examples/while/__init__.py
index fadaa73e2..15ff4eb65 100644
--- a/res/TensorFlowPythonExamples/examples/while/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/while/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
i = tf.compat.v1.constant(0, name="Hole")
c = lambda i: tf.compat.v1.less(i, 10)
diff --git a/res/TensorFlowPythonExamples/examples/while_2/__init__.py b/res/TensorFlowPythonExamples/examples/while_2/__init__.py
index af1c74582..9e26639bf 100644
--- a/res/TensorFlowPythonExamples/examples/while_2/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/while_2/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
i = tf.constant(0, shape=[1, 0], dtype=tf.int32, name='i')
x = tf.compat.v1.placeholder(shape=[1, 1], dtype=tf.int32, name='Hole')
diff --git a/res/TensorFlowPythonExamples/examples/while_3/__init__.py b/res/TensorFlowPythonExamples/examples/while_3/__init__.py
index 840846e7e..30ce15a1e 100644
--- a/res/TensorFlowPythonExamples/examples/while_3/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/while_3/__init__.py
@@ -1,5 +1,7 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
x = tf.compat.v1.placeholder(shape=[1, None], dtype=tf.int32, name='Hole')
i = tf.compat.v1.placeholder(shape=[1, None], dtype=tf.int32, name='Hole_2')
diff --git a/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py b/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py
index 5230bbac6..16414cea2 100755
--- a/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/yuv_to_rgb/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(1, 16, 16, 3), name="Hole")
op_ = tf.compat.v1.image.yuv_to_rgb(in_)
diff --git a/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py b/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py
index 7daf85e84..d4080ec43 100644
--- a/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py
+++ b/res/TensorFlowPythonExamples/examples/zeros_like/__init__.py
@@ -1,4 +1,6 @@
import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
in_ = tf.compat.v1.placeholder(dtype=tf.float32, shape=(4, 4), name="Hole")
op_ = tf.zeros_like(in_)
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle
index 9a9465072..bc39a09b9 100644
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,7 +8,7 @@ android {
minSdkVersion 26
targetSdkVersion 29
versionCode 1
- versionName "1.20.0"
+ versionName "1.21.0"
externalNativeBuild {
ndkBuild {
diff --git a/runtime/libs/misc/CMakeLists.txt b/runtime/libs/misc/CMakeLists.txt
index 557d403ec..69d6a9208 100644
--- a/runtime/libs/misc/CMakeLists.txt
+++ b/runtime/libs/misc/CMakeLists.txt
@@ -1,11 +1,22 @@
# Library `nnfw_lib_misc`
-file(GLOB_RECURSE NNFW_UTILITY_SRCS "src/*.cpp")
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
-add_library(nnfw_lib_misc STATIC ${NNFW_UTILITY_SRCS})
+add_library(nnfw_lib_misc STATIC ${SOURCES})
target_include_directories(nnfw_lib_misc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
set_target_properties(nnfw_lib_misc PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(nnfw_lib_misc PRIVATE nnfw_common)
target_link_libraries(nnfw_lib_misc PRIVATE nnfw_coverage)
-add_executable(nnfw_tensor_index_iterator "examples/tensor_index_iterator.cpp")
-target_link_libraries(nnfw_tensor_index_iterator nnfw_lib_misc)
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+add_executable(nnfw_lib_misc_test ${TESTS})
+target_link_libraries(nnfw_lib_misc_test PRIVATE nnfw_lib_misc)
+target_link_libraries(nnfw_lib_misc_test PRIVATE nnfw_coverage)
+target_link_libraries(nnfw_lib_misc_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
+
+add_test(nnfw_lib_misc_test nnfw_lib_misc_test)
+install(TARGETS nnfw_lib_misc_test DESTINATION unittest_standalone)
diff --git a/runtime/libs/misc/examples/tensor_index_iterator.cpp b/runtime/libs/misc/examples/tensor_index_iterator.cpp
deleted file mode 100644
index 590b433df..000000000
--- a/runtime/libs/misc/examples/tensor_index_iterator.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "misc/tensor/IndexIterator.h"
-
-#include <array>
-
-#include <iostream>
-#include <algorithm>
-
-#include <cassert>
-
-void test_iterate(void)
-{
- const nnfw::misc::tensor::Shape shape{3, 4, 7};
-
- std::array<int, 3 * 4 * 7> array;
-
- array.fill(0);
-
- using nnfw::misc::tensor::Index;
- using nnfw::misc::tensor::iterate;
-
- iterate(shape) << [&](const Index &index) {
- assert(index.rank() == shape.rank());
-
- const uint32_t rank = index.rank();
-
- uint32_t offset = index.at(0);
-
- for (uint32_t axis = 1; axis < rank; ++axis)
- {
- offset *= shape.dim(axis);
- offset += index.at(axis);
- }
-
- array[offset] += 1;
- };
-
- assert(std::all_of(array.begin(), array.end(), [](int num) { return num == 1; }));
-}
-
-int main(int argc, char **argv)
-{
- test_iterate();
-
- nnfw::misc::tensor::Shape shape{3, 4, 3, 4};
-
- std::cout << "Iterate over tensor{3, 4, 3, 4}" << std::endl;
-
- nnfw::misc::tensor::iterate(shape) << [](const nnfw::misc::tensor::Index &index) {
- std::cout << "rank: " << index.rank() << std::endl;
-
- for (uint32_t d = 0; d < index.rank(); ++d)
- {
- std::cout << " offset(" << d << ") = " << index.at(d) << std::endl;
- }
- };
-
- return 0;
-}
diff --git a/runtime/libs/misc/include/misc/EnvConfigSource.h b/runtime/libs/misc/include/misc/EnvConfigSource.h
new file mode 100644
index 000000000..63c8ae9c0
--- /dev/null
+++ b/runtime/libs/misc/include/misc/EnvConfigSource.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_MISC_ENV_CONFIG_SOURCE_H__
+#define __NNFW_MISC_ENV_CONFIG_SOURCE_H__
+
+#include "GeneralConfigSource.h"
+
+#include <unordered_map>
+
+namespace nnfw
+{
+namespace misc
+{
+
+class EnvConfigSource final : public GeneralConfigSource
+{
+public:
+ std::string get(const std::string &key) const override;
+
+private:
+ std::unordered_map<std::string, std::string> _default_attributes;
+};
+
+} // namespace misc
+} // namespace nnfw
+
+#endif // __NNFW_MISC_ENV_CONFIG_SOURCE_H__
diff --git a/runtime/libs/misc/include/misc/GeneralConfigSource.h b/runtime/libs/misc/include/misc/GeneralConfigSource.h
new file mode 100644
index 000000000..a3de66e81
--- /dev/null
+++ b/runtime/libs/misc/include/misc/GeneralConfigSource.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__
+#define __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__
+
+#include "IConfigSource.h"
+
+#include <unordered_map>
+
+namespace nnfw
+{
+namespace misc
+{
+
+class GeneralConfigSource : public IConfigSource
+{
+public:
+ GeneralConfigSource() = default;
+
+ std::string get(const std::string &key) const override;
+ void set(const std::string &key, const std::string &val);
+
+private:
+ std::unordered_map<std::string, std::string> _map;
+};
+
+} // namespace misc
+} // namespace nnfw
+
+#endif // __NNFW_MISC_GENERAL_CONFIG_SOURCE_H__
diff --git a/runtime/libs/misc/include/misc/IConfigSource.h b/runtime/libs/misc/include/misc/IConfigSource.h
new file mode 100644
index 000000000..fe2c48ecf
--- /dev/null
+++ b/runtime/libs/misc/include/misc/IConfigSource.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_MISC_I_CONFIG_SOURCE_H__
+#define __NNFW_MISC_I_CONFIG_SOURCE_H__
+
+#include <string>
+
+namespace nnfw
+{
+namespace misc
+{
+
+struct IConfigSource
+{
+ /**
+ * @brief Destroy the IConfigSource object
+ */
+ virtual ~IConfigSource() = default;
+
+ /**
+ * @brief get the value for the matching key
+ *
+ * @param key string key to search
+ * @return string value associated with the key
+ */
+ virtual std::string get(const std::string &key) const = 0;
+};
+
+} // namespace misc
+} // namespace nnfw
+
+#endif // __NNFW_MISC_I_CONFIG_SOURCE_H__
diff --git a/runtime/libs/misc/include/misc/string_helpers.h b/runtime/libs/misc/include/misc/string_helpers.h
index 46fecca71..c9d72034f 100644
--- a/runtime/libs/misc/include/misc/string_helpers.h
+++ b/runtime/libs/misc/include/misc/string_helpers.h
@@ -50,7 +50,7 @@ inline std::vector<std::string> split(const std::string &s, char delim)
std::vector<std::string> elems;
while (std::getline(ss, item, delim))
{
- elems.push_back(std::move(item));
+ elems.push_back(item);
}
return elems;
}
diff --git a/runtime/libs/misc/src/EnvConfigSource.cpp b/runtime/libs/misc/src/EnvConfigSource.cpp
new file mode 100644
index 000000000..3abc9d196
--- /dev/null
+++ b/runtime/libs/misc/src/EnvConfigSource.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/EnvConfigSource.h"
+
+#include <cstdlib>
+
+namespace nnfw
+{
+namespace misc
+{
+
+std::string EnvConfigSource::get(const std::string &key) const
+{
+ const char *value = std::getenv(key.c_str());
+ if (value != nullptr)
+ {
+ return value;
+ }
+ else
+ {
+ return GeneralConfigSource::get(key);
+ }
+}
+
+} // namespace misc
+} // namespace nnfw
diff --git a/runtime/libs/misc/src/GeneralConfigSource.cpp b/runtime/libs/misc/src/GeneralConfigSource.cpp
new file mode 100644
index 000000000..298c1663e
--- /dev/null
+++ b/runtime/libs/misc/src/GeneralConfigSource.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/GeneralConfigSource.h"
+
+namespace nnfw
+{
+namespace misc
+{
+
+std::string GeneralConfigSource::get(const std::string &key) const
+{
+ auto itr = _map.find(key);
+ if (itr == _map.end())
+ {
+ return "";
+ }
+ else
+ {
+ return itr->second;
+ }
+}
+
+void GeneralConfigSource::set(const std::string &key, const std::string &val) { _map[key] = val; }
+
+} // namespace misc
+} // namespace nnfw
diff --git a/runtime/libs/misc/src/string_helpers.test.cpp b/runtime/libs/misc/src/string_helpers.test.cpp
new file mode 100644
index 000000000..1111425d0
--- /dev/null
+++ b/runtime/libs/misc/src/string_helpers.test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/string_helpers.h"
+
+#include <gtest/gtest.h>
+
+TEST(StringHelpersTest, split)
+{
+ const std::string example = "abc;def;ghi";
+
+ auto str_vector = nnfw::misc::split(example, ';');
+
+ ASSERT_EQ(str_vector.size(), 3);
+ EXPECT_STREQ(str_vector[0].c_str(), "abc");
+ EXPECT_STREQ(str_vector[1].c_str(), "def");
+ EXPECT_STREQ(str_vector[2].c_str(), "ghi");
+}
+
+TEST(StringHelpersTest, neg_split_empty)
+{
+ const std::string example = "";
+
+ auto str_vector = nnfw::misc::split(example, ';');
+
+ ASSERT_EQ(str_vector.size(), 0);
+}
+
+TEST(StringHelpersTest, neg_nonsplit)
+{
+ const std::string example = "abc;def;ghi";
+
+ auto str_vector = nnfw::misc::split(example, ':');
+
+ ASSERT_EQ(str_vector.size(), 1);
+ EXPECT_STREQ(str_vector[0].c_str(), example.c_str());
+}
+
+TEST(StringHelpersTest, append)
+{
+ auto append_str = nnfw::misc::str("abc", "-", 1);
+
+ EXPECT_STREQ(append_str.c_str(), "abc-1");
+}
+
+TEST(StringHelpersTest, neg_append_nullstr)
+{
+ const char *null_str = nullptr;
+ auto append_str = nnfw::misc::str(null_str, null_str);
+
+ ASSERT_EQ(append_str.size(), 0);
+}
+
+TEST(StringHelpersTest, join)
+{
+ const std::vector<std::string> example = {"abc", "def", "ghi"};
+
+ auto join_str = nnfw::misc::join(example.begin(), example.end(), ";");
+ EXPECT_STREQ(join_str.c_str(), "abc;def;ghi");
+}
+
+TEST(StringHelpersTest, neg_join_empty)
+{
+ const std::vector<std::string> example = {};
+
+ auto join_str = nnfw::misc::join(example.begin(), example.end(), ";");
+ ASSERT_EQ(join_str.size(), 0);
+}
diff --git a/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp b/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp
new file mode 100644
index 000000000..4cff6067f
--- /dev/null
+++ b/runtime/libs/misc/src/tensor/IndexEnumerator.test.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/tensor/IndexEnumerator.h"
+
+#include <vector>
+#include <algorithm>
+
+#include <gtest/gtest.h>
+
+using nnfw::misc::tensor::Shape;
+using nnfw::misc::tensor::Index;
+using nnfw::misc::tensor::IndexEnumerator;
+
+TEST(MiscIndexEnumeratorTest, iterate_full_range)
+{
+ const uint32_t H = 3;
+ const uint32_t W = 4;
+
+ const Shape shape{H, W};
+
+ std::vector<uint32_t> count;
+
+ count.resize(H * W, 0);
+
+ for (IndexEnumerator e{shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.curr();
+
+ ASSERT_EQ(2, ind.rank());
+ count.at(ind.at(0) * W + ind.at(1)) += 1;
+ }
+
+ ASSERT_TRUE(std::all_of(count.begin(), count.end(), [](uint32_t n) { return n == 1; }));
+}
+
+TEST(MiscIndexEnumeratorTest, neg_zero_rank_shape)
+{
+ // Test abnormal case of empty shape
+ // It is expected not to throw any exception, do nothing
+ const Shape shape{};
+ IndexEnumerator e{shape};
+ ASSERT_NO_THROW(e.valid());
+ ASSERT_NO_THROW(e.advance());
+ SUCCEED();
+}
diff --git a/runtime/libs/misc/src/tensor/IndexIterator.test.cpp b/runtime/libs/misc/src/tensor/IndexIterator.test.cpp
new file mode 100644
index 000000000..875786bdd
--- /dev/null
+++ b/runtime/libs/misc/src/tensor/IndexIterator.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "misc/tensor/IndexIterator.h"
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <array>
+
+using namespace nnfw::misc::tensor;
+
+TEST(MiscIndexIteratorTest, iterate)
+{
+ const Shape shape{3, 4, 7};
+
+ std::array<int, 3 * 4 * 7> array;
+
+ array.fill(0);
+
+ iterate(shape) << [&](const Index &index) {
+ assert(index.rank() == shape.rank());
+
+ const uint32_t rank = index.rank();
+
+ uint32_t offset = index.at(0);
+
+ for (uint32_t axis = 1; axis < rank; ++axis)
+ {
+ offset *= shape.dim(axis);
+ offset += index.at(axis);
+ }
+
+ array[offset] += 1;
+ };
+
+ ASSERT_TRUE(std::all_of(array.begin(), array.end(), [](int num) { return num == 1; }));
+}
+
+TEST(MiscIndexIteratorTest, neg_zero_rank_shape)
+{
+ // Test abnormal case of empty shape
+ // It is expected not to throw any exception, do nothing
+ const Shape shape{};
+
+ ASSERT_NO_THROW(iterate(shape) << ([](const Index &index) {}));
+ SUCCEED();
+}
diff --git a/runtime/libs/ndarray/CMakeLists.txt b/runtime/libs/ndarray/CMakeLists.txt
index f88f13186..cf8c5208a 100644
--- a/runtime/libs/ndarray/CMakeLists.txt
+++ b/runtime/libs/ndarray/CMakeLists.txt
@@ -3,8 +3,6 @@ add_library(ndarray STATIC src/Array.cpp src/ContiguousSpan.cpp)
set_target_properties(ndarray PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(ndarray PUBLIC include)
-#can't make this private because of c++ templates
-target_include_directories(ndarray PUBLIC src)
option(NDARRAY_INLINE_TEMPLATES "Set to ON to disable extern declarations for common types")
@@ -19,5 +17,12 @@ if(NOT ENABLE_TEST)
return()
endif(NOT ENABLE_TEST)
-add_subdirectory(test)
+add_executable(ndarray_test src/Array.test.cpp src/ContiguousSpan.test.cpp)
+target_link_libraries(ndarray_test PRIVATE ndarray)
+target_link_libraries(ndarray_test PRIVATE nnfw_coverage)
+target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
+
+add_test(ndarray_test ndarray_test)
+install(TARGETS ndarray_test DESTINATION unittest_standalone)
+
add_subdirectory(example)
diff --git a/runtime/libs/ndarray/include/ndarray/Array.h b/runtime/libs/ndarray/include/ndarray/Array.h
index 09e791763..568fe1c77 100644
--- a/runtime/libs/ndarray/include/ndarray/Array.h
+++ b/runtime/libs/ndarray/include/ndarray/Array.h
@@ -22,37 +22,21 @@
#include "ContiguousSpan.h"
#include "Shape.h"
-#if __cplusplus < 201402L
-#include "detail/cxx14.h" //integer_sequence and make_index_dequence definitions
-#else
-#include <utility>
-#endif
-
#include <algorithm>
-#include <cassert>
-#include <type_traits>
#include <array>
-#include <tuple>
+#include <cassert>
#include <cstddef>
+#include <tuple>
+#include <type_traits>
+#include <utility>
namespace ndarray
{
-// there is no index_sequence before c++14
-#if __cplusplus < 201402L
-
-template <size_t... Nums> using index_sequence = cxx14::index_sequence<Nums...>;
-
-template <size_t Num> using make_index_sequence = cxx14::make_index_sequence<Num>;
-
-#else
-
template <size_t... Nums> using index_sequence = std::index_sequence<Nums...>;
template <size_t _Num> using make_index_sequence = std::make_index_sequence<_Num>;
-#endif //__cplusplus < 201402L
-
struct Strides
{
explicit Strides(Shape s) : _strides{} { fillStrides(s); }
diff --git a/runtime/libs/ndarray/src/Array.test.cpp b/runtime/libs/ndarray/src/Array.test.cpp
new file mode 100644
index 000000000..15e67600d
--- /dev/null
+++ b/runtime/libs/ndarray/src/Array.test.cpp
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ndarray/Array.h"
+
+#include <gtest/gtest.h>
+
+using namespace ndarray;
+
+TEST(NDArrayArrayTests, basic_data_test)
+{
+ float raw_data[] = {1, 2, 3, 4};
+ int32_t raw_data_int[] = {1, 2, 3, 4};
+ uint32_t raw_data_uint[] = {1, 2, 3, 4};
+ int8_t raw_data_int8[] = {1, 2, 3, 4};
+
+ Array<float> data22{raw_data, {2, 2}};
+ Array<int32_t> data22_int{raw_data_int, {2, 2}};
+ Array<uint32_t> data22_uint{raw_data_uint, {2, 2}};
+ Array<int8_t> data22_int8{raw_data_int8, {2, 2}};
+
+ ASSERT_FLOAT_EQ(data22.at(0, 0), 1);
+ ASSERT_FLOAT_EQ(data22.at(0, 1), 2);
+ ASSERT_FLOAT_EQ(data22.at(1, 0), 3);
+ ASSERT_FLOAT_EQ(data22.at(1, 1), 4);
+ ASSERT_EQ(data22.shape().rank(), 2);
+ ASSERT_EQ(data22.shape().dim(0), 2);
+ ASSERT_EQ(data22.shape().dim(1), 2);
+
+ Array<float> data14{raw_data, {1, 4}};
+ ASSERT_FLOAT_EQ(data14.at(0, 0), 1);
+ ASSERT_FLOAT_EQ(data14.at(0, 1), 2);
+ ASSERT_FLOAT_EQ(data14.at(0, 2), 3);
+ ASSERT_FLOAT_EQ(data14.at(0, 3), 4);
+ ASSERT_EQ(data14.shape().rank(), 2);
+ ASSERT_EQ(data14.shape().dim(0), 1);
+ ASSERT_EQ(data14.shape().dim(1), 4);
+
+ // <float, false>
+ {
+ ContiguousSpan<float> cs = data22.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<float> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+ float sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_EQ(sum, 10);
+
+ std::vector<float> array_data{1, 2, 3, 4};
+ auto cs3 = std::make_unique<ContiguousSpan<float>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_FLOAT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ ASSERT_FLOAT_EQ(cs4->at(3), 4);
+ }
+
+ // <float, true>
+ {
+ ContiguousSpan<float, true> cs = data22.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<float, true> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+ float sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_FLOAT_EQ(sum, 10);
+
+ std::vector<float> array_data{1, 2, 3, 4};
+ auto cs3 = std::make_unique<ContiguousSpan<float, true>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_FLOAT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ ASSERT_FLOAT_EQ(cs4->at(3), 4);
+ }
+
+ // <int32_t, false>
+ {
+ ContiguousSpan<int32_t> cs = data22_int.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<int32_t> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_EQ(cs2.at(3), 4);
+
+ int32_t sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_EQ(sum, 10);
+
+ std::vector<int32_t> array_data{1, 2, 3, 4};
+ auto cs3 = std::make_unique<ContiguousSpan<int32_t>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ ASSERT_EQ(cs4->at(3), 4);
+ }
+
+ // <int32_t, true>
+ {
+ ContiguousSpan<int32_t, true> cs = data22_int.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<int32_t, true> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_EQ(cs2.at(3), 4);
+
+ int32_t sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_EQ(sum, 10);
+
+ std::vector<int32_t> array_data{1, 2, 3, 4};
+ auto cs3 =
+ std::make_unique<ContiguousSpan<int32_t, true>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ ASSERT_EQ(cs4->at(3), 4);
+ }
+
+ // <uint32_t, false>
+ {
+ ContiguousSpan<uint32_t> cs = data22_uint.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<uint32_t> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_EQ(cs2.at(3), 4);
+
+ uint32_t sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_EQ(sum, 10);
+
+ std::vector<uint32_t> array_data{1, 2, 3, 4};
+ auto cs3 = std::make_unique<ContiguousSpan<uint32_t>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ }
+
+ // <uint32_t, true>
+ {
+ ContiguousSpan<uint32_t, true> cs = data22_uint.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<uint32_t, true> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_EQ(cs2.at(3), 4);
+
+ uint32_t sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_EQ(sum, 10);
+
+ std::vector<uint32_t> array_data{1, 2, 3, 4};
+ auto cs3 =
+ std::make_unique<ContiguousSpan<uint32_t, true>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ ASSERT_EQ(cs4->at(3), 4);
+ }
+
+ // <int8_t, false>
+ {
+ ContiguousSpan<int8_t> cs = data22_int8.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<int8_t> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+ int8_t sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_EQ(sum, 10);
+
+ std::vector<int8_t> array_data{1, 2, 3, 4};
+ auto cs3 = std::make_unique<ContiguousSpan<int8_t>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ ASSERT_EQ(cs4->at(3), 4);
+
+ auto cs5 = ContiguousSpan<int8_t>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs5.size(), 4);
+ ASSERT_EQ(cs5.at(3), 4);
+ }
+
+ // <int8_t, true>
+ {
+ ContiguousSpan<int8_t, true> cs = data22_int8.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+ ContiguousSpan<int8_t, true> cs2 = std::move(cs);
+ ASSERT_EQ(cs2.size(), 4);
+ ASSERT_FLOAT_EQ(cs2.at(3), 4);
+
+ int8_t sum = 0;
+ for (auto it = cs2.begin(); it < cs2.end(); it++)
+ {
+ sum += *it;
+ }
+ ASSERT_EQ(sum, 10);
+
+ std::vector<int8_t> array_data{1, 2, 3, 4};
+ auto cs3 = std::make_unique<ContiguousSpan<int8_t, true>>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs3->size(), 4);
+ ASSERT_EQ(cs3->at(3), 4);
+
+ auto cs4 = std::move(cs3);
+ ASSERT_EQ(cs3, nullptr);
+ ASSERT_EQ(cs4->size(), 4);
+ ASSERT_EQ(cs4->at(3), 4);
+
+ auto cs5 = ContiguousSpan<int8_t, true>(array_data.begin(), array_data.end());
+ ASSERT_EQ(cs5.size(), 4);
+ ASSERT_EQ(cs5.at(3), 4);
+ }
+
+ Array<float> lv = std::move(data14);
+ ASSERT_FLOAT_EQ(lv.at(0, 0), 1);
+ ASSERT_FLOAT_EQ(lv.at(0, 1), 2);
+ ASSERT_FLOAT_EQ(lv.at(0, 2), 3);
+ ASSERT_FLOAT_EQ(lv.at(0, 3), 4);
+}
+
+TEST(NDArrayArrayTests, slice_write_test)
+{
+ // float
+ {
+ float raw_data[4] = {0};
+
+ Array<float> data22{raw_data, {2, 2}};
+
+ data22.slice(1) = {1, 2};
+
+ ASSERT_FLOAT_EQ(data22.at(0, 0), 0);
+ ASSERT_FLOAT_EQ(data22.at(0, 1), 0);
+ ASSERT_FLOAT_EQ(data22.at(1, 0), 1);
+ ASSERT_FLOAT_EQ(data22.at(1, 1), 2);
+ }
+
+ // int32_t
+ {
+ int32_t raw_data[4] = {0};
+ Array<int32_t> data22{raw_data, {2, 2}};
+
+ data22.slice(1) = {1, 2};
+
+ ASSERT_EQ(data22.at(0, 0), 0);
+ ASSERT_EQ(data22.at(0, 1), 0);
+ ASSERT_EQ(data22.at(1, 0), 1);
+ ASSERT_EQ(data22.at(1, 1), 2);
+ }
+
+ // uint32_t
+ {
+ uint32_t raw_data[4] = {0};
+ Array<uint32_t> data22{raw_data, {2, 2}};
+
+ data22.slice(1) = {1, 2};
+
+ ASSERT_EQ(data22.at(0, 0), 0);
+ ASSERT_EQ(data22.at(0, 1), 0);
+ ASSERT_EQ(data22.at(1, 0), 1);
+ ASSERT_EQ(data22.at(1, 1), 2);
+ }
+
+ // int8_t
+ {
+ int8_t raw_data[4] = {0};
+ Array<int8_t> data22{raw_data, {2, 2}};
+
+ data22.slice(1) = {1, 2};
+
+ ASSERT_EQ(data22.at(0, 0), 0);
+ ASSERT_EQ(data22.at(0, 1), 0);
+ ASSERT_EQ(data22.at(1, 0), 1);
+ ASSERT_EQ(data22.at(1, 1), 2);
+ }
+}
+
+TEST(NDArrayArrayTests, slice_read_test)
+{
+ // float
+ {
+ float raw_data[4] = {1, 2, 3, 4};
+
+ Array<float> data22{raw_data, {2, 2}};
+
+ auto slice = data22.slice(1);
+
+ ASSERT_FLOAT_EQ(slice[0], 3);
+ ASSERT_FLOAT_EQ(slice[1], 4);
+ }
+
+ // int32_t
+ {
+ int32_t raw_data[4] = {1, 2, 3, 4};
+
+ Array<int32_t> data22{raw_data, {2, 2}};
+
+ auto slice = data22.slice(1);
+
+ ASSERT_EQ(slice[0], 3);
+ ASSERT_EQ(slice[1], 4);
+ }
+
+ // uint32_t
+ {
+ uint32_t raw_data[4] = {1, 2, 3, 4};
+
+ Array<uint32_t> data22{raw_data, {2, 2}};
+
+ auto slice = data22.slice(1);
+
+ ASSERT_EQ(slice[0], 3);
+ ASSERT_EQ(slice[1], 4);
+ }
+
+ // int8_t
+ {
+ int8_t raw_data[4] = {1, 2, 3, 4};
+
+ Array<int8_t> data22{raw_data, {2, 2}};
+
+ auto slice = data22.slice(1);
+
+ ASSERT_EQ(slice[0], 3);
+ ASSERT_EQ(slice[1], 4);
+ }
+}
+
+TEST(NDArrayArrayTests, multidim_test)
+{
+ // float
+ {
+ float raw_data[5] = {0, 1, 2, 3, 4};
+
+ Array<float> data22{raw_data, {1, 1, 1, 1, 5}};
+
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+ }
+
+ // int32_t
+ {
+ int32_t raw_data[5] = {0, 1, 2, 3, 4};
+
+ Array<int32_t> data22{raw_data, {1, 1, 1, 1, 5}};
+
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+ }
+
+ // uint32_t
+ {
+ uint32_t raw_data[5] = {0, 1, 2, 3, 4};
+
+ Array<uint32_t> data22{raw_data, {1, 1, 1, 1, 5}};
+
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+ }
+
+ // int8_t
+ {
+ int8_t raw_data[5] = {0, 1, 2, 3, 4};
+
+ Array<int8_t> data22{raw_data, {1, 1, 1, 1, 5}};
+
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+ ASSERT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+ }
+}
diff --git a/runtime/libs/ndarray/src/ContiguousSpan.test.cpp b/runtime/libs/ndarray/src/ContiguousSpan.test.cpp
new file mode 100644
index 000000000..dd1108697
--- /dev/null
+++ b/runtime/libs/ndarray/src/ContiguousSpan.test.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ndarray/ContiguousSpan.h"
+
+#include <gtest/gtest.h>
+
+using namespace ndarray;
+
+TEST(NDArrayContiguousSpanTests, slice_assign_test)
+{
+ // float
+ {
+ std::vector<float> v1{1, 2, 3, 4, 5};
+ std::vector<float> v2(5);
+
+ ContiguousSpan<float> span1(v1.begin(), v1.end());
+ ContiguousSpan<float> span2(v2.begin(), v2.end());
+
+ span2.assign(span1);
+
+ ASSERT_EQ(v1, v2);
+ ASSERT_EQ(span1.size(), 5);
+ ASSERT_EQ(span2.size(), 5);
+
+ ASSERT_EQ(span2.at(2), 3);
+ ASSERT_EQ(span2.at(4), 5);
+
+ ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
+
+ ContiguousSpan<float> span3(span2.offset(1));
+ ASSERT_EQ(span3.size(), 4);
+ ASSERT_EQ(span3.at(0), 2);
+ ASSERT_EQ(span3.at(1), 3);
+ ASSERT_EQ(span3[2], 4);
+ ASSERT_EQ(span3[3], 5);
+
+ // const
+ ContiguousSpan<float, true> span4(v1.begin(), v1.end());
+ ASSERT_EQ(span4.size(), 5);
+ ASSERT_EQ(span4.at(0), 1);
+ ASSERT_EQ(span4.at(1), 2);
+ ASSERT_EQ(span4.at(2), 3);
+ ASSERT_EQ(span4[3], 4);
+ ASSERT_EQ(span4[4], 5);
+
+ ContiguousSpan<float, true> span5(span4.offset(1));
+ ASSERT_EQ(span5.size(), 4);
+ ASSERT_EQ(span5.at(0), 2);
+ ASSERT_EQ(span5.at(1), 3);
+ ASSERT_EQ(span5[2], 4);
+ ASSERT_EQ(span5[3], 5);
+
+ ASSERT_EQ(*(span5.data() + 2), *(span5.data() + 2));
+ }
+
+ // int32_t
+ {
+ std::vector<int32_t> v1{1, 2, 3, 4, 5};
+ std::vector<int32_t> v2(5);
+
+ ContiguousSpan<int32_t> span1(v1.begin(), v1.end());
+ ContiguousSpan<int32_t> span2(v2.begin(), v2.end());
+
+ span2.assign(span1);
+
+ ASSERT_EQ(v1, v2);
+ ASSERT_EQ(span1.size(), 5);
+ ASSERT_EQ(span2.size(), 5);
+
+ ASSERT_EQ(span2.at(2), 3);
+ ASSERT_EQ(span2.at(4), 5);
+
+ ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
+
+ ContiguousSpan<int32_t> span3(span2.offset(1));
+ ASSERT_EQ(span3.size(), 4);
+ ASSERT_EQ(span3.at(0), 2);
+ ASSERT_EQ(span3.at(1), 3);
+ ASSERT_EQ(span3[2], 4);
+ ASSERT_EQ(span3[3], 5);
+
+ // const
+ ContiguousSpan<int32_t, true> span4(v1.begin(), v1.end());
+ ASSERT_EQ(span4.size(), 5);
+ ASSERT_EQ(span4.at(0), 1);
+ ASSERT_EQ(span4.at(1), 2);
+ ASSERT_EQ(span4.at(2), 3);
+ ASSERT_EQ(span4[3], 4);
+ ASSERT_EQ(span4[4], 5);
+
+ ContiguousSpan<int32_t, true> span5(span4.offset(1));
+ ASSERT_EQ(span5.size(), 4);
+ ASSERT_EQ(span5.at(0), 2);
+ ASSERT_EQ(span5.at(1), 3);
+ ASSERT_EQ(span5[2], 4);
+ ASSERT_EQ(span5[3], 5);
+ }
+
+ // uint32_t
+ {
+ std::vector<uint32_t> v1{1, 2, 3, 4, 5};
+ std::vector<uint32_t> v2(5);
+
+ ContiguousSpan<uint32_t> span1(v1.begin(), v1.end());
+ ContiguousSpan<uint32_t> span2(v2.begin(), v2.end());
+
+ span2.assign(span1);
+
+ ASSERT_EQ(v1, v2);
+ ASSERT_EQ(span1.size(), 5);
+ ASSERT_EQ(span2.size(), 5);
+
+ ASSERT_EQ(span2.at(2), 3);
+ ASSERT_EQ(span2.at(4), 5);
+
+ ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
+
+ ContiguousSpan<uint32_t> span3(span2.offset(1));
+ ASSERT_EQ(span3.size(), 4);
+ ASSERT_EQ(span3.at(0), 2);
+ ASSERT_EQ(span3.at(1), 3);
+ ASSERT_EQ(span3[2], 4);
+ ASSERT_EQ(span3[3], 5);
+
+ // const
+ ContiguousSpan<uint32_t, true> span4(v1.begin(), v1.end());
+ ASSERT_EQ(span4.size(), 5);
+ ASSERT_EQ(span4.at(0), 1);
+ ASSERT_EQ(span4.at(1), 2);
+ ASSERT_EQ(span4.at(2), 3);
+ ASSERT_EQ(span4[3], 4);
+ ASSERT_EQ(span4[4], 5);
+
+ ContiguousSpan<uint32_t, true> span5(span4.offset(1));
+ ASSERT_EQ(span5.size(), 4);
+ ASSERT_EQ(span5.at(0), 2);
+ ASSERT_EQ(span5.at(1), 3);
+ ASSERT_EQ(span5[2], 4);
+ ASSERT_EQ(span5[3], 5);
+ }
+
+ // int8_t
+ {
+ std::vector<int8_t> v1{1, 2, 3, 4, 5};
+ std::vector<int8_t> v2(5);
+
+ ContiguousSpan<int8_t> span1(v1.begin(), v1.end());
+ ContiguousSpan<int8_t> span2(v2.begin(), v2.end());
+
+ span2.assign(span1);
+
+ ASSERT_EQ(v1, v2);
+ ASSERT_EQ(span1.size(), 5);
+ ASSERT_EQ(span2.size(), 5);
+
+ ASSERT_EQ(span2.at(2), 3);
+ ASSERT_EQ(span2.at(4), 5);
+
+ ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
+
+ ContiguousSpan<int8_t> span3(span2.offset(1));
+ ASSERT_EQ(span3.size(), 4);
+ ASSERT_EQ(span3.at(0), 2);
+ ASSERT_EQ(span3.at(1), 3);
+ ASSERT_EQ(span3[2], 4);
+ ASSERT_EQ(span3[3], 5);
+
+ // const
+ ContiguousSpan<int8_t, true> span4(v1.begin(), v1.end());
+ ASSERT_EQ(span4.size(), 5);
+ ASSERT_EQ(span4.at(0), 1);
+ ASSERT_EQ(span4.at(1), 2);
+ ASSERT_EQ(span4.at(2), 3);
+ ASSERT_EQ(span4[3], 4);
+ ASSERT_EQ(span4[4], 5);
+
+ ContiguousSpan<int8_t, true> span5(span4.offset(1));
+ ASSERT_EQ(span5.size(), 4);
+ ASSERT_EQ(span5.at(0), 2);
+ ASSERT_EQ(span5.at(1), 3);
+ ASSERT_EQ(span5[2], 4);
+ ASSERT_EQ(span5[3], 5);
+ }
+}
diff --git a/runtime/libs/ndarray/src/detail/cxx14.h b/runtime/libs/ndarray/src/detail/cxx14.h
deleted file mode 100644
index 8b78fb985..000000000
--- a/runtime/libs/ndarray/src/detail/cxx14.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _NDARRAY_CXX14_H_
-#define _NDARRAY_CXX14_H_
-
-namespace ndarray
-{
-
-namespace cxx14
-{
-
-template <size_t... Nums> struct index_sequence
-{
- using value_type = size_t;
-
- static constexpr std::size_t size() noexcept { return sizeof...(Nums); }
-};
-
-namespace detail
-{
-
-template <size_t v, typename Seq> struct _append;
-
-template <size_t v, size_t... Nums> struct _append<v, index_sequence<Nums...>>
-{
- using result = index_sequence<Nums..., v>;
-};
-
-template <size_t Len> struct make_index_sequence
-{
- using result =
- typename detail::_append<Len - 1, typename make_index_sequence<Len - 1>::result>::result;
-};
-
-template <> struct make_index_sequence<1>
-{
- using result = index_sequence<0>;
-};
-
-template <> struct make_index_sequence<0>
-{
- using result = index_sequence<>;
-};
-
-} // namespace detail
-
-template <size_t Num> using make_index_sequence = typename detail::make_index_sequence<Num>::result;
-
-} // namespace cxx14
-
-} // namespace ndarray
-
-#endif //_NDARRAY_CXX14_H_
diff --git a/runtime/libs/ndarray/test/CMakeLists.txt b/runtime/libs/ndarray/test/CMakeLists.txt
deleted file mode 100644
index be1ed6510..000000000
--- a/runtime/libs/ndarray/test/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-if(NOT TARGET ndarray)
- return()
-endif()
-
-add_executable(ndarray_test ndarray_test.cpp)
-
-target_link_libraries(ndarray_test PRIVATE ndarray)
-
-nnfw_find_package(GTest)
-if(NOT GTest_FOUND)
- message(STATUS "GTest not avaialble. Skipping NDArray test build")
- return()
-endif(NOT GTest_FOUND)
-
-target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
-
-add_test(ndarray_test ndarray_test)
-install(TARGETS ndarray_test DESTINATION unittest_standalone)
diff --git a/runtime/libs/ndarray/test/ndarray_test.cpp b/runtime/libs/ndarray/test/ndarray_test.cpp
deleted file mode 100644
index 4b5ad5765..000000000
--- a/runtime/libs/ndarray/test/ndarray_test.cpp
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest/gtest.h"
-
-#include "ndarray/Array.h"
-
-using namespace ndarray;
-
-TEST(NDArray_tests, basic_data_test)
-{
-
- float raw_data[] = {1, 2, 3, 4};
-
- Array<float> data22{raw_data, {2, 2}};
-
- ASSERT_FLOAT_EQ(data22.at(0, 0), 1);
- ASSERT_FLOAT_EQ(data22.at(0, 1), 2);
- ASSERT_FLOAT_EQ(data22.at(1, 0), 3);
- ASSERT_FLOAT_EQ(data22.at(1, 1), 4);
- ASSERT_EQ(data22.shape().rank(), 2);
- ASSERT_EQ(data22.shape().dim(0), 2);
- ASSERT_EQ(data22.shape().dim(1), 2);
-
- Array<float> data14{raw_data, {1, 4}};
- ASSERT_FLOAT_EQ(data14.at(0, 0), 1);
- ASSERT_FLOAT_EQ(data14.at(0, 1), 2);
- ASSERT_FLOAT_EQ(data14.at(0, 2), 3);
- ASSERT_FLOAT_EQ(data14.at(0, 3), 4);
- ASSERT_EQ(data14.shape().rank(), 2);
- ASSERT_EQ(data14.shape().dim(0), 1);
- ASSERT_EQ(data14.shape().dim(1), 4);
-
- ContiguousSpan<float> cs = data22.flat();
- ASSERT_EQ(cs.size(), 4);
- ASSERT_FLOAT_EQ(cs.at(3), 4);
-
- Array<float> lv = std::move(data14);
- ASSERT_FLOAT_EQ(lv.at(0, 0), 1);
- ASSERT_FLOAT_EQ(lv.at(0, 1), 2);
- ASSERT_FLOAT_EQ(lv.at(0, 2), 3);
- ASSERT_FLOAT_EQ(lv.at(0, 3), 4);
-}
-
-TEST(NDArray_tests, slice_write_test)
-{
- float raw_data[4] = {0};
-
- Array<float> data22{raw_data, {2, 2}};
-
- data22.slice(1) = {1, 2};
-
- ASSERT_FLOAT_EQ(data22.at(0, 0), 0);
- ASSERT_FLOAT_EQ(data22.at(0, 1), 0);
- ASSERT_FLOAT_EQ(data22.at(1, 0), 1);
- ASSERT_FLOAT_EQ(data22.at(1, 1), 2);
-}
-
-TEST(NDArray_tests, slice_read_test)
-{
- float raw_data[4] = {1, 2, 3, 4};
-
- Array<float> data22{raw_data, {2, 2}};
-
- auto slice = data22.slice(1);
-
- ASSERT_FLOAT_EQ(slice[0], 3);
- ASSERT_FLOAT_EQ(slice[1], 4);
-}
-
-TEST(NDArray_tests, multidim_test)
-{
- float raw_data[5] = {0, 1, 2, 3, 4};
-
- Array<float> data22{raw_data, {1, 1, 1, 1, 5}};
-
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 0), 0);
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 1), 1);
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 2), 2);
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 3), 3);
- ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 4), 4);
-}
-
-TEST(NDArray_tests, slice_assign_test)
-{
- std::vector<float> v1{1, 2, 3, 4, 5};
- std::vector<float> v2(5);
-
- ContiguousSpan<float> span1(v1.begin(), v1.end());
- ContiguousSpan<float> span2(v2.begin(), v2.end());
-
- span2.assign(span1);
-
- ASSERT_EQ(v1, v2);
- ASSERT_EQ(span1.size(), 5);
- ASSERT_EQ(span2.size(), 5);
-
- ASSERT_EQ(span2.at(2), 3);
- ASSERT_EQ(span2.at(4), 5);
-
- ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
-
- ContiguousSpan<float> span3(span2.offset(1));
- ASSERT_EQ(span3.size(), 4);
- ASSERT_EQ(span3.at(0), 2);
- ASSERT_EQ(span3.at(1), 3);
- ASSERT_EQ(span3.at(2), 4);
- ASSERT_EQ(span3.at(3), 5);
-}
diff --git a/runtime/onert/CMakeLists.txt b/runtime/onert/CMakeLists.txt
index 88d52a5bd..3c9ca99da 100644
--- a/runtime/onert/CMakeLists.txt
+++ b/runtime/onert/CMakeLists.txt
@@ -7,9 +7,3 @@ add_subdirectory(frontend)
add_subdirectory(core)
add_subdirectory(api)
add_subdirectory(sample)
-
-if(NOT ENABLE_TEST)
- return()
-endif(NOT ENABLE_TEST)
-
-add_subdirectory(test)
diff --git a/runtime/onert/api/CMakeLists.txt b/runtime/onert/api/CMakeLists.txt
index beb243a4d..badd5d133 100644
--- a/runtime/onert/api/CMakeLists.txt
+++ b/runtime/onert/api/CMakeLists.txt
@@ -10,6 +10,7 @@ set(NNFW_API_HEADERS include/nnfw.h include/nnfw_experimental.h)
target_link_libraries(${ONERT_DEV} PUBLIC nnfw-nnapi-header)
target_link_libraries(${ONERT_DEV} PRIVATE onert_core)
+target_link_libraries(${ONERT_DEV} PRIVATE nnfw_lib_misc)
target_link_libraries(${ONERT_DEV} PRIVATE jsoncpp tflite_loader circle_loader ${LIB_PTHREAD})
target_link_libraries(${ONERT_DEV} PRIVATE trix_loader)
target_link_libraries(${ONERT_DEV} PRIVATE nnfw_common)
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h
index 6f296a931..658cba4d5 100644
--- a/runtime/onert/api/include/nnfw.h
+++ b/runtime/onert/api/include/nnfw.h
@@ -193,7 +193,7 @@ typedef struct nnfw_tensorinfo
* And inference is performed after {@link nnfw_run} is invoked.
*
* <p>{@link nnfw_close_session} should be called once
- * if session is no longer need
+ * if session is no longer needed
*
* @param[out] session The session to be created
* @return NNFW_STATUS_NO_ERROR if successful
@@ -213,7 +213,7 @@ NNFW_STATUS nnfw_close_session(nnfw_session *session);
/**
* @brief Load model from nnpackage file or directory
*
- * The length of \p package_file_path must not execeed 1024 bytes including zero at the end.
+ * The length of \p package_file_path must not exceed 1024 bytes including zero at the end.
*
* @param[in] session nnfw_session loading the given nnpackage file/dir
* @param[in] package_file_path Path to the nnpackage file or unzipped directory to be loaded
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index 45b34716a..2fbb96f31 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01001400
+#define NNFW_VERSION 0x01001500
#endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc
index 0ebd385e9..a0e6ee094 100644
--- a/runtime/onert/api/src/nnfw_api.cc
+++ b/runtime/onert/api/src/nnfw_api.cc
@@ -58,15 +58,7 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_INFO_ID_VERSION, 0);
* @param session the session to be created
* @return NNFW_STATUS_NO_ERROR if successful
*/
-NNFW_STATUS nnfw_create_session(nnfw_session **session)
-{
- NNFW_RETURN_ERROR_IF_NULL(session);
-
- *session = new (std::nothrow) nnfw_session();
- if (*session == nullptr)
- return NNFW_STATUS_OUT_OF_MEMORY;
- return NNFW_STATUS_NO_ERROR;
-}
+NNFW_STATUS nnfw_create_session(nnfw_session **session) { return nnfw_session::create(session); }
/*
* Close a session instance
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc
index 62a043921..9b43dd381 100644
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -25,6 +25,7 @@
#include "tflite_loader.h"
#include "trix_loader.h"
#include "json/json.h"
+#include "ir/NNPkg.h"
#include "ir/OpCode.h"
#include "util/TracingCtx.h"
@@ -110,9 +111,7 @@ std::string trim(const std::string &value)
return value.substr(begin, range);
}
-using CfgKeyValues = std::unordered_map<std::string, std::string>;
-
-bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues)
+bool loadConfigure(const std::string cfgfile, onert::util::CfgKeyValues &keyValues)
{
std::ifstream ifs(cfgfile);
if (ifs.is_open())
@@ -143,19 +142,6 @@ bool loadConfigure(const std::string cfgfile, CfgKeyValues &keyValues)
return false;
}
-void setConfigKeyValues(const CfgKeyValues &keyValues)
-{
- auto configsrc = std::make_unique<onert::util::GeneralConfigSource>();
-
- for (auto it = keyValues.begin(); it != keyValues.end(); ++it)
- {
- VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl;
- configsrc->set(it->first, it->second);
- }
-
- onert::util::config_source_ext(std::move(configsrc));
-}
-
NNFW_TYPE datatype_to_nnfw_dtype(onert::ir::DataType dt)
{
using onert::ir::DataType;
@@ -195,15 +181,59 @@ void fillTensorInfo(nnfw_tensorinfo *ti, const onert::ir::Shape &shape,
ti->dtype = datatype_to_nnfw_dtype(dtype);
}
+std::unique_ptr<onert::ir::Model> loadModel(const std::string filename,
+ const std::string model_type)
+{
+ if (model_type == "tflite")
+ return onert::tflite_loader::loadModel(filename.c_str());
+ if (model_type == "circle")
+ return onert::circle_loader::loadModel(filename.c_str());
+ if (model_type == "tvn")
+ return onert::trix_loader::loadModel(filename.c_str());
+
+ std::cerr << "Unsupported model type" << std::endl;
+ return std::unique_ptr<onert::ir::Model>(nullptr);
+}
+
} // namespace
nnfw_session::nnfw_session()
- : _subgraphs{nullptr}, _compiler{nullptr}, _execution{nullptr},
- _kernel_registry{std::make_shared<onert::api::CustomKernelRegistry>()}, _tracing_ctx{nullptr}
+ : _nnpkg{nullptr}, _coptions{}, _compiler_artifact{nullptr}, _execution{nullptr},
+ _kernel_registry{nullptr}
{
// DO NOTHING
}
+NNFW_STATUS nnfw_session::create(nnfw_session **session)
+{
+ if (session == nullptr)
+ return NNFW_STATUS_UNEXPECTED_NULL;
+
+ // Create session
+ *session = new (std::nothrow) nnfw_session();
+ if (*session == nullptr)
+ {
+ std::cerr << "Error during session creation" << std::endl;
+ return NNFW_STATUS_OUT_OF_MEMORY;
+ }
+
+ // Initialize fields
+ try
+ {
+ (*session)->_kernel_registry = std::make_shared<onert::api::CustomKernelRegistry>();
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "Error during session initialization : " << e.what() << std::endl;
+ delete *session;
+ *session = nullptr;
+
+ return NNFW_STATUS_ERROR;
+ }
+
+ return NNFW_STATUS_NO_ERROR;
+}
+
nnfw_session::~nnfw_session() = default;
NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size)
@@ -219,19 +249,16 @@ NNFW_STATUS nnfw_session::load_circle_from_buffer(uint8_t *buffer, size_t size)
try
{
- _subgraphs = onert::circle_loader::loadModel(buffer, size);
+ auto model = onert::circle_loader::loadModel(buffer, size);
+ _nnpkg = std::make_shared<onert::ir::NNPkg>(std::move(model));
+ _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig());
+ _state = State::MODEL_LOADED;
}
catch (const std::exception &e)
{
std::cerr << "Error during model loading : " << e.what() << std::endl;
return NNFW_STATUS_ERROR;
}
-
- _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
-
- _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
-
- _state = State::MODEL_LOADED;
return NNFW_STATUS_NO_ERROR;
}
@@ -247,45 +274,28 @@ NNFW_STATUS nnfw_session::load_model_from_modelfile(const char *model_file_path)
}
std::string filename{model_file_path};
- if (filename.size() < 8) // .tflite or .circle
+ // TODO: Use std::filesystem::path when we can use c++17.
+ auto dotidx = filename.find_last_of('.');
+ if (dotidx == std::string::npos)
{
- std::cerr << "Invalid model file path." << std::endl;
+ std::cerr << "Invalid model file path. Please use file with extension." << std::endl;
return NNFW_STATUS_ERROR;
}
-
- std::string model_type = filename.substr(filename.size() - 7, 7);
-
+ std::string model_type = filename.substr(dotidx + 1); // + 1 to exclude dot
try
{
- if (model_type == ".tflite")
- {
- _subgraphs = onert::tflite_loader::loadModel(filename.c_str());
- }
- else if (model_type == ".circle")
- {
- _subgraphs = onert::circle_loader::loadModel(filename.c_str());
- }
- else if (model_type == ".tvn")
- {
- _subgraphs = onert::trix_loader::loadModel(filename.c_str());
- }
- else
- {
- std::cerr << "Unsupported model type" << std::endl;
+ auto model = loadModel(filename, model_type);
+ if (model == nullptr)
return NNFW_STATUS_ERROR;
- }
+ _nnpkg = std::make_shared<onert::ir::NNPkg>(std::move(model));
+ _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig());
+ _state = State::MODEL_LOADED;
}
catch (const std::exception &e)
{
std::cerr << "Error during model loading : " << e.what() << std::endl;
return NNFW_STATUS_ERROR;
}
-
- _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
-
- _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
-
- _state = State::MODEL_LOADED;
return NNFW_STATUS_NO_ERROR;
}
@@ -334,45 +344,59 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
{
auto filepath = package_path + std::string("/metadata/") + configs[0].asString();
- CfgKeyValues keyValues;
+ onert::util::CfgKeyValues keyValues;
if (loadConfigure(filepath, keyValues))
{
- setConfigKeyValues(keyValues);
+ onert::util::setConfigKeyValues(keyValues);
}
}
-
- auto model_file_path = package_path + std::string("/") + models[0].asString(); // first model
- auto model_type = model_types[0].asString(); // first model's type
- if (model_type == "tflite")
+ _nnpkg = std::make_shared<onert::ir::NNPkg>();
+ for (uint32_t i = 0; i < models.size(); ++i)
{
- _subgraphs = onert::tflite_loader::loadModel(model_file_path);
- }
- else if (model_type == "circle")
- {
- _subgraphs = onert::circle_loader::loadModel(model_file_path);
- }
- else if (model_type == "tvn")
- {
- _subgraphs = onert::trix_loader::loadModel(model_file_path);
+ auto model_file_path = package_path + std::string("/") + models[i].asString();
+ auto model_type = model_types[i].asString();
+ auto model = loadModel(model_file_path, model_type);
+ if (model == nullptr)
+ return NNFW_STATUS_ERROR;
+ model->primary_subgraph()->bindKernelBuilder(_kernel_registry->getBuilder());
+ _nnpkg->push(onert::ir::ModelIndex{i}, std::move(model));
+ _coptions.push_back(onert::compiler::CompilerOptions::fromGlobalConfig());
}
- else
+
+ auto toIODesc = [](std::string str) {
+ auto indices = nnfw::misc::split(str, ':');
+ if (indices.size() != 3)
+ {
+ std::cerr << "IODesc should be 3-tuple." << std::endl;
+ return onert::ir::IODesc{};
+ }
+ auto model_idx = static_cast<uint32_t>(std::stoi(indices.at(0)));
+ auto subgraph_idx = static_cast<uint32_t>(std::stoi(indices.at(1)));
+ auto operand_idx = static_cast<uint32_t>(std::stoi(indices.at(2)));
+ return onert::ir::IODesc{model_idx, subgraph_idx, operand_idx};
+ };
+ // read pkg-inputs and pkg-outputs
+ const Json::Value &pkg_inputs = root["pkg-inputs"];
+ for (uint32_t i = 0; i < pkg_inputs.size(); ++i)
+ _nnpkg->addInput(toIODesc(pkg_inputs[i].asString()));
+ const Json::Value &pkg_outputs = root["pkg-outputs"];
+ for (uint32_t i = 0; i < pkg_outputs.size(); ++i)
+ _nnpkg->addOutput(toIODesc(pkg_outputs[i].asString()));
+ // read model-connect
+ const Json::Value &fromtos = root["model-connect"];
+ for (uint32_t i = 0; i < fromtos.size(); ++i)
{
- std::cerr << "Unsupported model type in MANIFEST" << std::endl;
- return NNFW_STATUS_ERROR;
+ const Json::Value &tos = fromtos[i]["to"];
+ for (uint32_t j = 0; j < tos.size(); ++j)
+ _nnpkg->addEdge(toIODesc(fromtos[i]["from"].asString()), toIODesc(tos[j].asString()));
}
- _subgraphs->primary()->bindKernelBuilder(_kernel_registry->getBuilder());
+ _state = State::MODEL_LOADED;
}
catch (const std::exception &e)
{
std::cerr << "Error during model loading : " << e.what() << std::endl;
return NNFW_STATUS_ERROR;
}
-
- _tracing_ctx = std::make_unique<onert::util::TracingCtx>(_subgraphs.get());
-
- _compiler = std::make_unique<onert::compiler::Compiler>(_subgraphs, _tracing_ctx.get());
-
- _state = State::MODEL_LOADED;
return NNFW_STATUS_NO_ERROR;
}
@@ -396,9 +420,17 @@ NNFW_STATUS nnfw_session::prepare()
try
{
- _subgraphs.reset();
- std::shared_ptr<onert::exec::ExecutorMap> executors = _compiler->compile();
- _execution = std::make_unique<onert::exec::Execution>(executors);
+ // TODO: Compile all models in case of multiple models
+ if (_nnpkg->model_count() > 2)
+ {
+ std::cerr << "Error during model prepare : more than 3 multiple models are not supported yet."
+ << std::endl;
+ return NNFW_STATUS_ERROR;
+ }
+ auto compiler = std::make_unique<onert::compiler::Compiler>(_nnpkg, _coptions);
+ _nnpkg.reset();
+ _compiler_artifact = compiler->compile();
+ _execution = std::make_unique<onert::exec::Execution>(_compiler_artifact->_executors);
}
catch (const std::exception &e)
{
@@ -430,13 +462,14 @@ NNFW_STATUS nnfw_session::prepare_pipeline(const char *map_file_path)
try
{
- _subgraphs.reset();
- std::vector<std::shared_ptr<onert::exec::ExecutorMap>> executor_maps =
- _compiler->compile(_package_file_path.c_str(), map_file_path);
+ auto model = _nnpkg->primary_model();
+ auto compiler = std::make_unique<onert::compiler::Compiler>(model, *_coptions[0]);
+ _nnpkg.reset();
+ auto artifacts = compiler->compile(_package_file_path.c_str(), map_file_path);
- for (auto it = executor_maps.begin(); it != executor_maps.end(); ++it)
+ for (auto it = artifacts.begin(); it != artifacts.end(); ++it)
{
- _executions.push_back(std::make_shared<onert::exec::Execution>(*it));
+ _executions.push_back(std::make_shared<onert::exec::Execution>(it->get()->_executors));
}
make_dependency();
_threads.resize(_executions.size());
@@ -740,7 +773,8 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
{
// In this case, if we apply input shape in primary_subgraph, it will propagate after
// compilation and excution
- auto primary_subgraph = _subgraphs->primary();
+ auto model = _nnpkg->primary_model();
+ auto primary_subgraph = model->primary_subgraph();
auto ind = primary_subgraph->getInputs().at(index);
auto &input = primary_subgraph->operands().at(ind);
@@ -851,12 +885,12 @@ void nnfw_session::make_dependency()
{
for (uint32_t out_exe = 0; out_exe < _executions.size(); out_exe++)
{
- auto out_graph = _executions[out_exe]->primary_subgraph();
+ auto &out_graph = _executions[out_exe]->primary_subgraph();
for (uint32_t in_exe = 0; in_exe < _executions.size(); in_exe++)
{
if (out_exe == in_exe)
continue;
- auto in_graph = _executions[in_exe]->primary_subgraph();
+ auto &in_graph = _executions[in_exe]->primary_subgraph();
for (auto out = out_graph._name_to_output_begin(); out != out_graph._name_to_output_end();
out++)
{
@@ -971,7 +1005,7 @@ NNFW_STATUS nnfw_session::set_available_backends(const char *backends)
if (null_terminating(backends, MAX_BACKEND_NAME_LENGTH) == false)
return NNFW_STATUS_ERROR;
- auto &options = _compiler->options();
+ auto &options = *_coptions[0];
using namespace onert::util;
@@ -1005,7 +1039,7 @@ NNFW_STATUS nnfw_session::set_op_backend(const char *op, const char *backend)
return NNFW_STATUS_ERROR;
}
- auto &opcode_to_backend = _compiler->options().manual_scheduler_options.opcode_to_backend;
+ auto &opcode_to_backend = _coptions[0]->manual_scheduler_options.opcode_to_backend;
opcode_to_backend.emplace(onert::ir::toOpCode(key), backend);
}
catch (const std::exception &e)
@@ -1024,7 +1058,7 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
if (!key || !value)
return NNFW_STATUS_UNEXPECTED_NULL;
- auto &options = _compiler->options();
+ auto &options = *_coptions[0];
using namespace onert::util;
@@ -1067,14 +1101,14 @@ NNFW_STATUS nnfw_session::set_config(const char *key, const char *value)
const onert::ir::Graph *nnfw_session::primary_subgraph()
{
- if (_subgraphs)
+ if (_nnpkg != nullptr)
{
- assert(!_execution && _executions.empty());
- return _subgraphs->primary().get();
+ assert(_execution == nullptr && _executions.empty());
+ return _nnpkg->primary_model()->primary_subgraph().get();
}
else
{
- assert(_execution || !_executions.empty());
+ assert(_execution != nullptr || !_executions.empty());
// TODO Remove const_cast
// We assumed the graph will not change after compilation, but shape could change
if (!_executions.empty())
@@ -1094,7 +1128,7 @@ NNFW_STATUS nnfw_session::get_config(const char *key, char *value, size_t value_
if (!key || !value)
return NNFW_STATUS_UNEXPECTED_NULL;
- auto &options = _compiler->options();
+ auto &options = *_coptions[0];
auto check_boundary = [](size_t dest_size, std::string &src) {
if (dest_size < src.length() + 1 /* for '\0' */)
@@ -1138,9 +1172,9 @@ bool nnfw_session::isStateInitialized()
{
if (_state == State::INITIALIZED)
{
- assert(!_subgraphs);
- assert(!_compiler);
- assert(!_execution && _executions.empty());
+ assert(_nnpkg == nullptr);
+ assert(_coptions.empty());
+ assert(_execution == nullptr && _executions.empty());
return true;
}
else
@@ -1153,9 +1187,9 @@ bool nnfw_session::isStateModelLoaded()
{
if (_state == State::MODEL_LOADED)
{
- assert(_subgraphs);
- assert(_compiler);
- assert(!_execution && _executions.empty());
+ assert(_nnpkg != nullptr);
+ assert(!_coptions.empty());
+ assert(_execution == nullptr && _executions.empty());
return true;
}
else
@@ -1168,9 +1202,9 @@ bool nnfw_session::isStatePrepared()
{
if (_state == State::PREPARED)
{
- assert(!_subgraphs);
- assert(_compiler);
- assert(_execution || !_executions.empty());
+ assert(_nnpkg == nullptr);
+ assert(!_coptions.empty());
+ assert(_execution != nullptr || !_executions.empty());
return true;
}
else
@@ -1183,9 +1217,9 @@ bool nnfw_session::isStateRunning()
{
if (_state == State::RUNNING)
{
- assert(!_subgraphs);
- assert(_compiler);
- assert(_execution || !_executions.empty());
+ assert(_nnpkg == nullptr);
+ assert(!_coptions.empty());
+ assert(_execution != nullptr || !_executions.empty());
return true;
}
return false;
@@ -1195,9 +1229,9 @@ bool nnfw_session::isStateFinishedRun()
{
if (_state == State::FINISHED_RUN)
{
- assert(!_subgraphs);
- assert(_compiler);
- assert(_execution || !_executions.empty());
+ assert(_nnpkg == nullptr);
+ assert(!_coptions.empty());
+ assert(_execution != nullptr || !_executions.empty());
return true;
}
else
@@ -1224,9 +1258,14 @@ NNFW_STATUS nnfw_session::output_tensorindex(const char *tensorname, uint32_t *i
NNFW_STATUS nnfw_session::set_backends_per_operation(const char *backend_settings)
{
if (backend_settings == NULL)
- {
return NNFW_STATUS_ERROR;
- }
- _compiler->set_backend_from_str(backend_settings);
+
+ if (!isStateModelLoaded())
+ return NNFW_STATUS_INVALID_STATE;
+
+ // Backend for all
+ auto &ms_options = _coptions[0]->manual_scheduler_options;
+ ms_options.setBackendMap(std::string{backend_settings});
+
return NNFW_STATUS_NO_ERROR;
}
diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h
index 6d75d894f..9b729fd5f 100644
--- a/runtime/onert/api/src/nnfw_api_internal.h
+++ b/runtime/onert/api/src/nnfw_api_internal.h
@@ -20,7 +20,6 @@
#include "nnfw.h"
#include "nnfw_experimental.h"
-#include <util/GeneralConfigSource.h>
#include <util/TracingCtx.h>
#include <string>
@@ -41,11 +40,13 @@ class Execution;
namespace ir
{
class Graph;
-class Subgraphs;
+class Model;
+class NNPkg;
} // namespace ir
namespace compiler
{
-class Compiler;
+struct CompilerArtifact;
+class CompilerOptions;
} // namespace compiler
} // namespace onert
@@ -97,9 +98,18 @@ private:
};
public:
+ /**
+ * @brief Factory method. It creates and initialize nnfw_session
+ *
+ * @note Use factory instead of constructor to get status
+ */
+ static NNFW_STATUS create(nnfw_session **session);
+
+private:
nnfw_session();
- ~nnfw_session();
+public:
+ ~nnfw_session();
NNFW_STATUS load_model_from_nnpackage(const char *package_file_path);
NNFW_STATUS prepare();
NNFW_STATUS prepare_pipeline(const char *map_file_path);
@@ -148,6 +158,10 @@ public:
NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func);
NNFW_STATUS input_tensorindex(const char *tensorname, uint32_t *index);
NNFW_STATUS output_tensorindex(const char *tensorname, uint32_t *index);
+ /**
+ * @brief Set backends with string-encoded mapping from operation index to backend type
+ * (cpu, acl_cl)
+ */
NNFW_STATUS set_backends_per_operation(const char *backend_settings);
private:
@@ -161,15 +175,14 @@ private:
private:
State _state{State::INITIALIZED};
- std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
- std::unique_ptr<onert::compiler::Compiler> _compiler;
+ std::shared_ptr<onert::ir::NNPkg> _nnpkg;
+ std::vector<std::unique_ptr<onert::compiler::CompilerOptions>> _coptions;
+ std::shared_ptr<onert::compiler::CompilerArtifact> _compiler_artifact;
std::unique_ptr<onert::exec::Execution> _execution;
std::shared_ptr<onert::api::CustomKernelRegistry> _kernel_registry;
std::vector<std::thread> _threads;
std::vector<std::shared_ptr<onert::exec::Execution>> _executions;
std::string _package_file_path;
-
- std::unique_ptr<onert::util::TracingCtx> _tracing_ctx;
};
#endif // __API_NNFW_API_INTERNAL_H__
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h
index 945ad83bb..301ded01f 100644
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -46,8 +46,10 @@ public:
{
const auto &graph = *data.graph;
const auto &operands = data.graph->operands();
+ const auto is_linear_executor = data.is_linear_executor;
+
auto context = std::make_unique<acl_cl::BackendContext>(this, std::move(data));
- auto tm = createTensorManager(data.is_linear_executor);
+ auto tm = createTensorManager(is_linear_executor);
auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
auto tb = std::make_shared<TensorBuilder>(operands, tm);
context->tensor_registry = tr;
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h
index 62b163b11..1c7713055 100644
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -46,8 +46,10 @@ public:
{
const auto &graph = *data.graph;
const auto &operands = data.graph->operands();
+ const auto is_linear_executor = data.is_linear_executor;
+
auto context = std::make_unique<acl_neon::BackendContext>(this, std::move(data));
- auto tm = createTensorManager(data.is_linear_executor);
+ auto tm = createTensorManager(is_linear_executor);
auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
auto tb = std::make_shared<TensorBuilder>(operands, tm);
context->tensor_registry = tr;
diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt
index b61e58251..99643b983 100644
--- a/runtime/onert/backend/cpu/CMakeLists.txt
+++ b/runtime/onert/backend/cpu/CMakeLists.txt
@@ -6,7 +6,7 @@ file(GLOB_RECURSE SOURCES "*.cc")
add_library(${LIB_ONERT_BACKEND_CPU} SHARED ${SOURCES})
-target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_lib_cker nnfw_lib_misc)
target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE onert_core)
target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_common)
target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage)
diff --git a/runtime/onert/backend/cpu/ExternalContext.h b/runtime/onert/backend/cpu/ExternalContext.h
index ab0bb5f10..6ed4799a8 100644
--- a/runtime/onert/backend/cpu/ExternalContext.h
+++ b/runtime/onert/backend/cpu/ExternalContext.h
@@ -20,6 +20,8 @@
#include <util/ConfigSource.h>
#include <ruy/context.h>
+#include <memory>
+
namespace onert
{
namespace backend
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 75274dc88..762ee7392 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -244,17 +244,13 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
assert(_tensor_builder->dynamicTensorManager());
assert(_tensor_reg);
- auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
-
// Prepare to handle dynamic tensors later
auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
{
- dyn_ctx->op_ind = ind;
- dyn_ctx->operations = &_operations_ctx;
- dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-
- ret->dynamic_tensor_ctx(dyn_ctx);
+ dyn_ctx->op = &_operations_ctx.at(ind);
+ dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
}
+ ret->dynamic_tensor_ctx(dyn_ctx);
auto &op = _graph.operations().at(ind);
op.accept(*this);
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
index 2255d5e9f..4672fe406 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -63,7 +63,7 @@ void ConvolutionLayer::convFloat32()
getBuffer<float>(_output));
}
-void ConvolutionLayer::convQuant8()
+void ConvolutionLayer::convQ8uPerTensor()
{
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
@@ -99,7 +99,33 @@ void ConvolutionLayer::convQuant8()
getBuffer<uint8_t>(_output));
}
-void ConvolutionLayer::convQuant8PerChannel()
+void ConvolutionLayer::convQ8uPerChannel()
+{
+ nnfw::cker::ConvParams op_params;
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = _dilationWidthFactor;
+ op_params.dilation_height_factor = _dilationHeightFactor;
+ op_params.input_offset = -_input->data_zero_point();
+ op_params.output_offset = _output->data_zero_point();
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+ &output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ // NOTE: The following fields of ConvParams are not used:
+ // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max}
+
+ nnfw::cker::Conv &kernel = *_conv_kernel;
+ kernel(op_params, getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel),
+ getBuffer<uint8_t>(_kernel), _kernel->data_zero_points().data(), getShape(_bias),
+ getBuffer<int32_t>(_bias), getShape(_output), getBuffer<uint8_t>(_output));
+}
+
+void ConvolutionLayer::convQ8i()
{
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
@@ -189,11 +215,15 @@ void ConvolutionLayer::run()
}
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- convQuant8();
+ const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+ if (per_channel_quantized)
+ convQ8uPerChannel();
+ else
+ convQ8uPerTensor();
}
else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
{
- convQuant8PerChannel();
+ convQ8i();
}
else
{
@@ -210,8 +240,8 @@ void ConvolutionLayer::prepare()
if (_input->data_type() == OperandType::FLOAT32 && _kernel->is_constant())
{
bool is_transposed = false;
- kernel.prepare(getShape(_kernel), getBuffer<float>(_kernel), getPaddingType(_paddingType),
- is_transposed, _dilationWidthFactor, _dilationHeightFactor);
+ kernel.prepareF32(getShape(_kernel), getBuffer<float>(_kernel), getPaddingType(_paddingType),
+ is_transposed, _dilationWidthFactor, _dilationHeightFactor);
// Decrease reference of _kernel(weights) only when _kernel is constant
if (is_transposed)
@@ -225,8 +255,20 @@ void ConvolutionLayer::prepare()
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&
!_input->is_dynamic() && !_output->is_dynamic())
{
- kernel.prepareQuant(getShape(_input), getShape(_kernel), getShape(_output), _strideWidth,
- _strideHeight, _dilationWidthFactor, _dilationHeightFactor);
+ const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+ if (per_channel_quantized)
+ {
+ GetQuantizedConvolutionMultipliersAndShifts(
+ _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
+ _kernel->data_scales().size(), getShape(_kernel).Dims(0),
+ kernel.per_channel_output_multiplier(), kernel.per_channel_output_shift());
+ }
+ else
+ {
+ kernel.prepareQ8uPerTensor(getShape(_input), getShape(_kernel), getShape(_output),
+ _strideWidth, _strideHeight, _dilationWidthFactor,
+ _dilationHeightFactor);
+ }
}
else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
{
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
index 5d7f7c296..9f5253c8e 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
@@ -50,9 +50,10 @@ public:
public:
void convFloat32();
- void convQuant8();
+ void convQ8uPerTensor();
+ void convQ8uPerChannel();
- void convQuant8PerChannel();
+ void convQ8i();
void configure(const IPortableTensor *input, const IPortableTensor *kernel,
const IPortableTensor *bias, ir::PaddingType _paddingType,
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
index 30641ecae..8a48497d5 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.cc
@@ -49,7 +49,7 @@ void DepthwiseConvolutionLayer::convFloat32()
getBuffer<float>(_output), _external_context->ruy_context());
}
-void DepthwiseConvolutionLayer::convQuant8()
+void DepthwiseConvolutionLayer::convQ8uPerTensor()
{
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
@@ -84,11 +84,39 @@ void DepthwiseConvolutionLayer::convQuant8()
getBuffer<uint8_t>(_output), _external_context->ruy_context());
}
-void DepthwiseConvolutionLayer::convQuant8PerChannel()
+void DepthwiseConvolutionLayer::convQ8uPerChannel()
+{
+ nnfw::cker::DepthwiseConvParams op_params;
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = _dilationWidth;
+ op_params.dilation_height_factor = _dilationHeight;
+ op_params.depth_multiplier = _multiplier;
+ op_params.input_offset = -_input->data_zero_point();
+ op_params.output_offset = _output->data_zero_point();
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+ &output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ // NOTE: The following fields of ConvParams are not used:
+ // padding_type, weights_offset, output_{multiplier,shift}, float_activation_{min,max}
+
+ nnfw::cker::reference_integer_ops::DepthwiseConvPerChannel(
+ op_params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
+ getShape(_input), getBuffer<uint8_t>(_input), getShape(_kernel), getBuffer<uint8_t>(_kernel),
+ _kernel->data_zero_points().data(), getShape(_bias), getBuffer<int32_t>(_bias),
+ getShape(_output), getBuffer<uint8_t>(_output));
+}
+
+void DepthwiseConvolutionLayer::convQ8i()
{
if (!_prepared)
{
- prepareQuant8PerChannel();
+ prepareQ8i();
_prepared = true;
}
@@ -119,7 +147,15 @@ void DepthwiseConvolutionLayer::convQuant8PerChannel()
_external_context->ruy_context());
}
-void DepthwiseConvolutionLayer::prepareQuant8PerChannel()
+void DepthwiseConvolutionLayer::prepareQ8i()
+{
+ GetQuantizedConvolutionMultipliersAndShifts(
+ _input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
+ _kernel->data_scales().size(), getShape(_kernel).Dims(3), _per_channel_output_multiplier,
+ _per_channel_output_shift);
+}
+
+void DepthwiseConvolutionLayer::prepareQ8uPerChannel()
{
GetQuantizedConvolutionMultipliersAndShifts(
_input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
@@ -155,7 +191,17 @@ void DepthwiseConvolutionLayer::configure(
{
if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())
{
- prepareQuant8PerChannel();
+ prepareQ8i();
+ _prepared = true;
+ }
+ }
+ else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM && _kernel->is_constant() &&
+ !_input->is_dynamic() && !_output->is_dynamic())
+ {
+ const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+ if (per_channel_quantized)
+ {
+ prepareQ8uPerChannel();
_prepared = true;
}
}
@@ -169,11 +215,15 @@ void DepthwiseConvolutionLayer::run()
}
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- convQuant8();
+ const bool per_channel_quantized = _kernel->data_scales().size() > 1;
+ if (per_channel_quantized)
+ convQ8uPerChannel();
+ else
+ convQ8uPerTensor();
}
else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
{
- convQuant8PerChannel();
+ convQ8i();
}
else
{
diff --git a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
index 720550636..5c910109a 100644
--- a/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/DepthwiseConvolutionLayer.h
@@ -40,9 +40,10 @@ public:
public:
void convFloat32();
- void convQuant8();
+ void convQ8uPerTensor();
+ void convQ8uPerChannel();
- void convQuant8PerChannel();
+ void convQ8i();
void configure(const IPortableTensor *input, const IPortableTensor *kernel,
const IPortableTensor *bias, const uint32_t paddingLeft,
@@ -55,7 +56,8 @@ public:
void run() override;
private:
- void prepareQuant8PerChannel();
+ void prepareQ8i();
+ void prepareQ8uPerChannel();
private:
const IPortableTensor *_input{nullptr};
diff --git a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc
index 8a6fe6504..d89741c86 100644
--- a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc
+++ b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc
@@ -121,7 +121,9 @@ Array<const CornerBox> decodeBoxes(const Array<float> &raw_boxes, const Array<fl
assert(box.y2 > box.y1);
}
- return array_cast<const CornerBox>(std::move(decoded_boxes_a), decoded_boxes_a.shape());
+ auto decoded_boxes_a_shape = decoded_boxes_a.shape();
+
+ return array_cast<const CornerBox>(std::move(decoded_boxes_a), decoded_boxes_a_shape);
}
}
diff --git a/runtime/onert/backend/ruy/ExternalContext.h b/runtime/onert/backend/ruy/ExternalContext.h
index 3cc4eaa5a..c73ae636e 100644
--- a/runtime/onert/backend/ruy/ExternalContext.h
+++ b/runtime/onert/backend/ruy/ExternalContext.h
@@ -20,6 +20,8 @@
#include <util/ConfigSource.h>
#include <ruy/context.h>
+#include <memory>
+
namespace onert
{
namespace backend
diff --git a/runtime/onert/backend/ruy/KernelGenerator.cc b/runtime/onert/backend/ruy/KernelGenerator.cc
index c2f6a1f79..b2bbf9bfc 100644
--- a/runtime/onert/backend/ruy/KernelGenerator.cc
+++ b/runtime/onert/backend/ruy/KernelGenerator.cc
@@ -42,17 +42,13 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
assert(_tensor_builder->dynamicTensorManager());
assert(_tensor_reg);
- auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
-
// Prepare to handle dynamic tensors later
auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
{
- dyn_ctx->op_ind = ind;
- dyn_ctx->operations = &_operations_ctx;
- dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-
- ret->dynamic_tensor_ctx(dyn_ctx);
+ dyn_ctx->op = &_operations_ctx.at(ind);
+ dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
}
+ ret->dynamic_tensor_ctx(dyn_ctx);
auto &op = _graph.operations().at(ind);
op.accept(*this);
diff --git a/runtime/onert/backend/trix/CMakeLists.txt b/runtime/onert/backend/trix/CMakeLists.txt
index 5455757ca..a94be247d 100644
--- a/runtime/onert/backend/trix/CMakeLists.txt
+++ b/runtime/onert/backend/trix/CMakeLists.txt
@@ -1,6 +1,6 @@
set(LIB_ONERT_BACKEND_TRIX onert_backend_trix)
-nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET)
+nnfw_find_package(TRIXEngine QUIET 2.5.0)
if(NOT TRIXEngine_FOUND)
return()
endif(NOT TRIXEngine_FOUND)
diff --git a/runtime/onert/backend/trix/DevContext.h b/runtime/onert/backend/trix/DevContext.h
index 482932fd4..a7dbd7a59 100644
--- a/runtime/onert/backend/trix/DevContext.h
+++ b/runtime/onert/backend/trix/DevContext.h
@@ -32,28 +32,42 @@ public:
DevContext()
{
auto device_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP);
+ // TODO: x64 platform has 3 cores. We do not support more that 2 cores for now.
+ if (device_count > 2)
+ {
+ device_count = 2;
+ }
+
if (device_count <= 0)
{
- throw std::runtime_error("Unable to find TRIV2 NPU device");
+ throw std::runtime_error("Unable to find TRIX NPU device");
}
- // Use NPU 0 device
- if (getNPUdeviceByType(&_dev_handle, NPUCOND_TRIV2_CONN_SOCIP, 0) < 0)
+ for (int i = 0; i < device_count; i++)
{
- throw std::runtime_error("Failed to get TRIV2 NPU device handle");
+ npudev_h h;
+ if (getNPUdeviceByType(&h, NPUCOND_TRIV2_CONN_SOCIP, i) < 0)
+ {
+ throw std::runtime_error("Failed to get TRIX NPU device handle");
+ }
+ _dev_handles.push_back(h);
}
}
~DevContext()
{
- if (_dev_handle != nullptr)
+ for (auto h : _dev_handles)
{
- unregisterNPUmodel_all(_dev_handle);
- putNPUdevice(_dev_handle);
+ if (h != nullptr)
+ {
+ unregisterNPUmodel_all(h);
+ putNPUdevice(h);
+ }
}
}
- npudev_h getDev() { return _dev_handle; }
+ npudev_h getDev(int i) { return _dev_handles[i]; }
+ int getDevSize() { return _dev_handles.size(); }
template <typename T> void setDataInfo(tensors_data_info *info, std::vector<T *> &tensors)
{
@@ -66,14 +80,15 @@ public:
}
}
- template <typename T> void setBuffer(generic_buffers *buf, std::vector<T *> &tensors)
+ template <typename T>
+ void setBuffer(generic_buffers *buf, std::vector<T *> &tensors, int batch_size, int batch_index)
{
buf->num_buffers = static_cast<uint32_t>(tensors.size());
for (uint32_t idx = 0; idx < buf->num_buffers; ++idx)
{
- buf->bufs[idx].addr = tensors[idx]->buffer();
- buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size());
+ buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size() / batch_size);
+ buf->bufs[idx].addr = tensors[idx]->buffer() + (batch_index * buf->bufs[idx].size);
buf->bufs[idx].type = BUFFER_MAPPED;
}
}
@@ -106,9 +121,8 @@ private:
}
private:
- // NPU device handle
- // TODO Support multicore npu device
- npudev_h _dev_handle;
+ // NPU device handles
+ std::vector<npudev_h> _dev_handles;
};
} // namespace trix
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.cc b/runtime/onert/backend/trix/ops/BulkLayer.cc
index 71fdf3f0d..3c49da9a3 100644
--- a/runtime/onert/backend/trix/ops/BulkLayer.cc
+++ b/runtime/onert/backend/trix/ops/BulkLayer.cc
@@ -18,6 +18,7 @@
#include <util/logging.h>
#include <libnpuhost.h>
+#include <future>
namespace onert
{
@@ -49,24 +50,56 @@ void BulkLayer::configure(const std::vector<const IPortableTensor *> &inputs,
throw std::runtime_error("Unable to extract the model metadata");
}
+ _model_id.resize(_dev_context->getDevSize());
+
generic_buffer model_file;
model_file.type = BUFFER_FILE;
model_file.filepath = binary_path.c_str();
model_file.size = _meta->size;
- if (registerNPUmodel(dev_context->getDev(), &model_file, &_model_id) < 0)
+ for (int i = 0; i < _dev_context->getDevSize(); i++)
+ {
+ if (registerNPUmodel(dev_context->getDev(i), &model_file, &_model_id[i]) < 0)
+ {
+ throw std::runtime_error("Failed to register npu model");
+ }
+ }
+}
+
+void single_job(npudev_h dev, int req_id, input_buffers *input_buf, tensors_data_info *in_info,
+ output_buffers *output_buf, tensors_data_info *out_info)
+{
+ if (setNPU_requestData(dev, req_id, input_buf, in_info, output_buf, out_info))
+ {
+ throw std::runtime_error("Unable to create NPU request for red_id (" + std::to_string(req_id) +
+ ")");
+ }
+
+ if (submitNPU_request(dev, req_id))
{
- throw std::runtime_error("Failed to register npu model");
+ throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
+ ")");
}
}
void BulkLayer::run()
{
- int req_id;
- if (createNPU_request(_dev_context->getDev(), _model_id, &req_id))
+ // TODO: Remove too many assumption
+ // We assume user wants batch execution if user's input size is multiples of model's input size
+ int user_input_batch = (_inputs[0]->get_info().shape()).dim(0);
+ int model_input_batch = _meta->input_seg_dims[0][0];
+ int batch_size = user_input_batch / model_input_batch;
+ bool is_batch_execution = (batch_size != 1 ? true : false);
+
+ std::vector<int> req_id(_dev_context->getDevSize());
+
+ for (int i = 0; i < _dev_context->getDevSize(); i++)
{
- throw std::runtime_error("Unable to create NPU request with model id (" +
- std::to_string(_model_id) + ")");
+ if (createNPU_request(_dev_context->getDev(i), _model_id[i], &req_id[i]))
+ {
+ throw std::runtime_error("Unable to create NPU request with model id (" +
+ std::to_string(_model_id[i]) + ")");
+ }
}
if (_meta->input_seg_num != _inputs.size())
@@ -84,28 +117,58 @@ void BulkLayer::run()
_dev_context->setDataInfo<const IPortableTensor>(&in_info, _inputs);
_dev_context->setDataInfo<IPortableTensor>(&out_info, _outputs);
- input_buffers input_buf;
- output_buffers output_buf;
- _dev_context->setBuffer<const IPortableTensor>(&input_buf, _inputs);
- _dev_context->setBuffer<IPortableTensor>(&output_buf, _outputs);
+ std::vector<input_buffers> input_buf;
+ std::vector<output_buffers> output_buf;
+ input_buf.resize(_dev_context->getDevSize());
+ output_buf.resize(_dev_context->getDevSize());
+
+ std::vector<std::future<void>> f(_dev_context->getDevSize());
- if (setNPU_requestData(_dev_context->getDev(), req_id, &input_buf, &in_info, &output_buf,
- &out_info))
+ const int num_cores = _dev_context->getDevSize();
+ if (is_batch_execution)
{
- throw std::runtime_error("Unable to create NPU request for model id (" +
- std::to_string(_model_id) + ")");
+ // TODO: Support for general number of cores(>2)
+ // Here we assume that 2 trix cores
+ for (int i = 0; i < (batch_size); i = i + num_cores)
+ {
+ for (int core = 0; core < num_cores; core++)
+ {
+ _dev_context->setBuffer<const IPortableTensor>(&input_buf[core], _inputs, batch_size,
+ i + core);
+ _dev_context->setBuffer<IPortableTensor>(&output_buf[core], _outputs, batch_size, i + core);
+ }
+ for (int core = 0; core < num_cores; core++)
+ {
+
+ if (i + core < batch_size)
+ {
+ f[core] =
+ std::async(std::launch::async, &single_job, _dev_context->getDev(core), req_id[core],
+ &input_buf[core], &in_info, &output_buf[core], &out_info);
+ }
+ }
+ for (int core = 0; core < num_cores; core++)
+ {
+ f[core].wait();
+ }
+ }
}
-
- if (submitNPU_request(_dev_context->getDev(), req_id))
+ else
{
- throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
- ")");
+ _dev_context->setBuffer<const IPortableTensor>(&input_buf[0], _inputs, batch_size, 0);
+ _dev_context->setBuffer<IPortableTensor>(&output_buf[0], _outputs, batch_size, 0);
+
+ single_job(_dev_context->getDev(0), req_id[0], &input_buf[0], &in_info, &output_buf[0],
+ &out_info);
}
- if (removeNPU_request(_dev_context->getDev(), req_id))
+ for (int i = 0; i < _dev_context->getDevSize(); i++)
{
- throw std::runtime_error("Unable to remove NPU request with req id (" + std::to_string(req_id) +
- ")");
+ if (removeNPU_request(_dev_context->getDev(i), req_id[i]))
+ {
+ throw std::runtime_error("Unable to remove NPU request with req id (" +
+ std::to_string(req_id[i]) + ")");
+ }
}
}
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.h b/runtime/onert/backend/trix/ops/BulkLayer.h
index f7080ccad..614c0f728 100644
--- a/runtime/onert/backend/trix/ops/BulkLayer.h
+++ b/runtime/onert/backend/trix/ops/BulkLayer.h
@@ -50,7 +50,7 @@ private:
std::vector<const IPortableTensor *> _inputs;
std::vector<IPortableTensor *> _outputs;
- uint32_t _model_id;
+ std::vector<uint32_t> _model_id;
npubin_meta *_meta;
std::shared_ptr<DevContext> _dev_context;
};
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc
index 28f729d77..9580bec8c 100644
--- a/runtime/onert/backend/xnnpack/KernelGenerator.cc
+++ b/runtime/onert/backend/xnnpack/KernelGenerator.cc
@@ -56,17 +56,13 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
assert(_tensor_builder->dynamicTensorManager());
assert(_tensor_reg);
- auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
-
// Prepare to handle dynamic tensors later
auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
{
- dyn_ctx->op_ind = ind;
- dyn_ctx->operations = &_operations_ctx;
- dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-
- ret->dynamic_tensor_ctx(dyn_ctx);
+ dyn_ctx->op = &_operations_ctx.at(ind);
+ dyn_ctx->dynamic_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
}
+ ret->dynamic_tensor_ctx(dyn_ctx);
auto &op = _graph.operations().at(ind);
op.accept(*this);
diff --git a/runtime/onert/core/CMakeLists.txt b/runtime/onert/core/CMakeLists.txt
index 6dbadf80b..87c7a13e4 100644
--- a/runtime/onert/core/CMakeLists.txt
+++ b/runtime/onert/core/CMakeLists.txt
@@ -6,14 +6,18 @@ nnfw_find_package(Ruy REQUIRED)
add_library(onert_core SHARED ${SOURCES})
set_target_properties(onert_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+# NOTE
+# We publish public headers into developer package.
+# To avoid mistake using private header in public header, do not define
+# private target_include_directories scope for src/ directory.
target_include_directories(onert_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_include_directories(onert_core PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
-target_link_libraries(onert_core PUBLIC nnfw_lib_misc half)
-target_link_libraries(onert_core PRIVATE nnfw_lib_cker)
+
+target_link_libraries(onert_core PRIVATE jsoncpp half)
+target_link_libraries(onert_core PRIVATE nnfw_lib_misc nnfw_lib_cker)
target_link_libraries(onert_core PRIVATE nnfw_common)
target_link_libraries(onert_core PRIVATE nnfw_coverage)
target_link_libraries(onert_core PRIVATE dl ${LIB_PTHREAD})
-target_link_libraries(onert_core PRIVATE jsoncpp)
target_link_libraries(onert_core PRIVATE ruy)
target_link_libraries(onert_core INTERFACE ruy_instrumentation)
@@ -48,6 +52,8 @@ set(TEST_ONERT_CORE test_onert_core)
add_executable(${TEST_ONERT_CORE} ${TESTS})
target_link_libraries(${TEST_ONERT_CORE} onert_core)
+# Requires linking nnfw_coverage: check header coverage
+target_link_libraries(${TEST_ONERT_CORE} nnfw_coverage)
target_link_libraries(${TEST_ONERT_CORE} gtest gtest_main dl ${LIB_PTHREAD})
add_test(${TEST_ONERT_CORE} ${TEST_ONERT_CORE})
diff --git a/runtime/onert/core/include/backend/ITensor.h b/runtime/onert/core/include/backend/ITensor.h
index 0a4d9c814..560416264 100644
--- a/runtime/onert/core/include/backend/ITensor.h
+++ b/runtime/onert/core/include/backend/ITensor.h
@@ -20,6 +20,7 @@
#include <cstring>
#include <cstdint>
#include <functional>
+#include <stdexcept>
#include "ir/DataType.h"
#include "ir/Layout.h"
diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
index 58bfe3406..cf2da4c34 100644
--- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
+++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -103,7 +103,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
// 1. Scan DEF of outputs. If the DEF, allocate it
// 2. Scan DEF of inputs. If variable tensor, allocate it
// 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
- for (const auto op_ind : order)
+ for (const auto &op_ind : order)
{
const auto &op = graph.operations().at(op_ind);
auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
@@ -161,7 +161,7 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
}
}
- for (auto ind : operands_last_until_end)
+ for (auto &ind : operands_last_until_end)
{
tensor_builder->notifyLastUse(ind);
}
diff --git a/runtime/onert/core/include/compiler/BackendManager.h b/runtime/onert/core/include/compiler/BackendManager.h
index befe40022..b44fcf836 100644
--- a/runtime/onert/core/include/compiler/BackendManager.h
+++ b/runtime/onert/core/include/compiler/BackendManager.h
@@ -17,12 +17,11 @@
#ifndef __ONERT_COMPILER_BACKEND_MANAGER_H__
#define __ONERT_COMPILER_BACKEND_MANAGER_H__
-#include <memory>
-#include <map>
-
-#include "ir/Operands.h"
#include "backend/Backend.h"
-#include "backend/builtin/Backend.h"
+#include "ir/Operands.h"
+
+#include <map>
+#include <memory>
namespace onert
{
@@ -41,7 +40,7 @@ public:
public:
backend::Backend *get(const std::string &key);
const backend::Backend *get(const std::string &key) const;
- const backend::builtin::Backend *getBuiltin() const;
+ const backend::Backend *getBuiltin() const;
const std::vector<const backend::Backend *> getAll() const
{
std::vector<const backend::Backend *> v;
@@ -65,7 +64,7 @@ private:
private:
std::map<std::string, std::unique_ptr<void, dlhandle_destroy_t>> _handle_map;
std::map<std::string, std::unique_ptr<backend::Backend, backend_destroy_t>> _gen_map;
- backend::builtin::Backend *_builtin{nullptr};
+ backend::Backend *_builtin{nullptr};
/**
* @brief load builtin backend
*
diff --git a/runtime/onert/core/include/compiler/Compiler.h b/runtime/onert/core/include/compiler/Compiler.h
index 292de4b12..f05d63c66 100644
--- a/runtime/onert/core/include/compiler/Compiler.h
+++ b/runtime/onert/core/include/compiler/Compiler.h
@@ -22,8 +22,8 @@
#ifndef __ONERT_COMPILER_COMPILE_H_
#define __ONERT_COMPILER_COMPILE_H_
-#include "ir/Graph.h"
-#include "exec/IExecutor.h"
+#include "ir/NNPkg.h"
+#include "exec/Executors.h"
#include "util/TracingCtx.h"
namespace onert
@@ -40,6 +40,10 @@ enum class State
struct ManualSchedulerOptions
{
+public:
+ void setBackendMap(const std::string &str);
+
+public:
std::string backend_for_all;
std::unordered_map<ir::OpCode, std::string> opcode_to_backend;
std::unordered_map<ir::OperationIndex, std::string> index_to_backend;
@@ -50,8 +54,14 @@ struct PartialGraphOptions
std::unordered_map<ir::OperationIndex, ir::SubgraphIndex> index_to_graph;
};
-struct CompilerOptions
+class CompilerOptions
{
+public:
+ // Set default values for CompilerOptions
+ // All these default values should not be fetched from Env, when we stop supporting Android NNAPI.
+ static std::unique_ptr<CompilerOptions> fromGlobalConfig();
+
+public:
// GENERAL OPTIONS
std::vector<std::string> backend_list;
@@ -65,75 +75,85 @@ struct CompilerOptions
bool disable_compile; //< Run with Interpreter if true, try compilation otherwise
bool fp16_enable; //< Whether fp16 mode ON/OFF
PartialGraphOptions partial_graph_options;
-
- util::TracingCtx *tracing_ctx; //< Profiling information
};
-CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs);
+struct CompilerArtifact
+{
+ CompilerArtifact(void) = delete;
+ CompilerArtifact(std::shared_ptr<exec::Executors> executors,
+ std::unique_ptr<const util::TracingCtx> tracing_ctx)
+ : _executors{executors}, _tracing_ctx{std::move(tracing_ctx)} {};
+
+ std::shared_ptr<exec::Executors> _executors;
+ std::unique_ptr<const util::TracingCtx> _tracing_ctx;
+};
/**
- * @brief Class to compile graph model
+ * @brief Class to compile NN package
*/
class Compiler
{
public:
/**
- * @brief Construct a new Compiler object
- * @param[in] subgs All subgraphs of a model
- * @param[in] tracing_ctx Profiling information
+ * @brief Construct a new Compiler object for single model
+ * @param[in] model model to compile
+ * @param[in] coptions Compiler Options
+ */
+ Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt);
+
+ /**
+ * @brief Construct a new Compiler object for NN package
+ * @param[in] nnpkg NN package to compile
+ * @param[in] coptions Compiler option vector for each model in package
*/
- Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx);
+ Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts);
public:
/**
* @brief Do compilation with the options
*
- * @return std::shared_ptr<exec::ExecutorMap> Executors as a result of compilation
+ * @return std::shared_ptr<CompilerArtifact> Executors as a result of compilation
*/
- std::shared_ptr<exec::ExecutorMap> compile(void);
+ std::shared_ptr<CompilerArtifact> compile(void);
/**
* @brief Do compilation with the options
*
- * @return std::vector<std::shared_ptr<exec::ExecutorMap>> Executors as a result of compilation
+ * @return std::vector<std::shared_ptr<CompilerArtifact>> Executors as a result of compilation
* for pipeline
*/
- std::vector<std::shared_ptr<exec::ExecutorMap>> compile(const char *package_file_path,
- const char *map_file_path);
+ std::vector<std::shared_ptr<CompilerArtifact>> compile(const char *package_file_path,
+ const char *map_file_path);
State state(void) const { return _state; }
- CompilerOptions &options() { return _options; }
-
/**
* @brief Allow to compute float32 using float16 data type
*/
void enableToFp16();
/**
- * @brief Set backends from string-encoded mappings from operation index to backend type (cpu,
- * acl_cl)
- */
- void set_backend_from_str(const char *backend_settings);
-
- /**
* @brief Build the partial graphs to compile with original graph
*/
bool buildPartialGraph(uint32_t num_graphs);
private:
void checkProfilerConditions();
- std::shared_ptr<ir::Graph> &primary_subgraph() { return _subgraphs->at(ir::SubgraphIndex{0}); }
+ std::shared_ptr<ir::Graph> &primary_subgraph()
+ {
+ return _nnpkg->primary_model()->at(ir::SubgraphIndex{0});
+ }
private:
- std::shared_ptr<ir::Subgraphs> _subgraphs;
+ std::shared_ptr<ir::NNPkg> _nnpkg;
// NOTE These executors does not have duplicated subgraph. This mean they do not allow support
// subgraphs being called recursively because data of non-constant tensor of parent executor will
// be updated by child executor. If you want to support subgraphs being called recursively, you
// have to add allocate non-constant tensor memory of executors in execution time when each
// subgraph is called.
State _state;
- CompilerOptions _options;
+ std::vector<CompilerOptions *> _voptions;
};
} // namespace compiler
diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h
index 10ca8e9fc..7264f2a10 100644
--- a/runtime/onert/core/include/compiler/LoweredGraph.h
+++ b/runtime/onert/core/include/compiler/LoweredGraph.h
@@ -60,9 +60,14 @@ public:
private:
void makeLowerInfo(const compiler::BackendResolver &backend_resolver);
void dumpLowerInfo();
- void lowerGraph(const ir::Graph &graph, const compiler::CompilerOptions &options);
+ void lowerGraph(const compiler::CompilerOptions &options);
private:
+ /**
+ * @brief Copy of target graph for lowering
+ * @note It uses copy of graph, not reference.
+ * It allows the original graph can be compiled multiple times.
+ */
ir::Graph _graph;
ir::Graph _parent_graph;
std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks;
diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h
index b2272e262..f701dc207 100644
--- a/runtime/onert/core/include/compiler/StaticShapeInferer.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h
@@ -28,6 +28,36 @@ namespace onert
{
namespace compiler
{
+/**
+ * @brief Class that observe and update operands.
+ */
+class OperandObserver
+{
+public:
+ /**
+ * @brief Constructor of OperandObserver
+ *
+ * @param operands Operands to be updated
+ */
+ OperandObserver(const std::vector<ir::Operand *> &operands) : _operands{operands} {}
+ /**
+ * @brief Destructor of OperandObserver
+ */
+ virtual ~OperandObserver() = default;
+
+public:
+ /**
+ * @brief Update Shape and some OperandInfo of operands
+ *
+ * @param operands Operands to be updated
+ * @param unpredictable Whether runtime can predict shapes of operands in compilation time
+ */
+ void updateShapes(const std::vector<ir::OperandInfo> &changed_operands_info,
+ bool unpredictable = false);
+
+private:
+ std::vector<ir::Operand *> _operands;
+};
/**
* @brief Class to infer shape before running kernels. It does the following:
@@ -38,32 +68,42 @@ namespace compiler
class StaticShapeInferer : public ir::OperationVisitor
{
public:
- StaticShapeInferer(
- const ir::SubgraphIndex &subg_idx,
- const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
- &lowered_subgs)
- : _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()),
- _operations(lowered_subgs.at(subg_idx)->graph().operations()),
- _return_has_dynamic_tensor(false)
- { /* empty */
+ StaticShapeInferer(compiler::LoweredGraph *lowered_subg)
+ : _lowered_subg{lowered_subg}, _subg_input_observers{}, _controlflow_output_observer{nullptr},
+ _child_inferers{}
+ {
}
virtual ~StaticShapeInferer() = default;
public:
+ void appendSubgInputObserver(const ir::SubgraphIndex &subg_idx,
+ std::unique_ptr<OperandObserver> &&subg_input_observer) noexcept
+ {
+ _subg_input_observers[subg_idx] = std::move(subg_input_observer);
+ }
+
+ void setControlflowOutputObserver(std::unique_ptr<OperandObserver> &&output_observer) noexcept
+ {
+ _controlflow_output_observer = std::move(output_observer);
+ }
+
+ void appendChildInferer(const ir::SubgraphIndex &subg_idx, compiler::StaticShapeInferer *inferer)
+ {
+ _child_inferers[subg_idx] = inferer;
+ }
+
/**
- * @brief Infer shape of operands beloning to ops and set the output shape.
+ * @brief Infer shape of operands belonging to ops and set the output shape.
* If output shape cannot be known without running op, mark it so that it can be allocated
* when running kernel.
- * @param op Operation
- * @return @c true if op's input or output has any dynamic tensor; @c false otherwise.
*/
- bool infer(const ir::Operation &op);
+ void infer(void);
void dump();
private:
- void inferSubgraph(ir::SubgraphIndex subg_ind);
bool checkDynamicInput(const ir::Operation &op);
+ bool checkDynamicOutput(const ir::Operation &op);
void setDynamicOutput(const ir::Operation &op);
private:
@@ -113,6 +153,7 @@ private:
void visit(const ir::operation::Unpack &op) override;
void visit(const ir::operation::While &op) override;
void visit(const ir::operation::DetectionPostProcess &op) override;
+ void visit(const ir::operation::Bulk &op) override;
private:
/**
@@ -128,12 +169,11 @@ private:
void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);
private:
- const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
- &_lowered_subgs;
- // _operands and _operations can be changed by controlflow operation
- ir::Operands &_operands; // operands of current subgraph
- ir::Operations &_operations; // operations of current subgraph
- bool _return_has_dynamic_tensor;
+ compiler::LoweredGraph *_lowered_subg;
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<OperandObserver>>
+ _subg_input_observers; // child subg input
+ std::unique_ptr<OperandObserver> _controlflow_output_observer; // parent controlflow op output
+ std::unordered_map<ir::SubgraphIndex, compiler::StaticShapeInferer *> _child_inferers;
};
} // namespace compiler
diff --git a/runtime/onert/core/include/exec/Execution.h b/runtime/onert/core/include/exec/Execution.h
index b0a5cced3..1e8083c4c 100644
--- a/runtime/onert/core/include/exec/Execution.h
+++ b/runtime/onert/core/include/exec/Execution.h
@@ -22,7 +22,7 @@
#define __ONERT_EXEC_EXECUTION_H__
#include "ir/Layout.h"
-#include "exec/IExecutor.h"
+#include "exec/Executors.h"
#include "IODescription.h"
#include <thread>
@@ -46,7 +46,7 @@ public:
* @brief Construct a new Execution object
* @param[in] executor Model executor
*/
- Execution(const std::shared_ptr<ExecutorMap> &executors);
+ Execution(const std::shared_ptr<Executors> &executors);
public:
/**
@@ -250,7 +250,7 @@ private:
std::unique_ptr<IExecutor> &primary_executor() { return _executors->at(ir::SubgraphIndex{0}); };
private:
- const std::shared_ptr<ExecutorMap> _executors;
+ const std::shared_ptr<Executors> _executors;
IODescription _io_desc;
std::deque<std::pair<IODescription *, uint32_t>> _async_io_descs;
sem_t _async_io_descs_sem;
diff --git a/runtime/onert/core/include/exec/Executors.h b/runtime/onert/core/include/exec/Executors.h
new file mode 100644
index 000000000..5adb0eda4
--- /dev/null
+++ b/runtime/onert/core/include/exec/Executors.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_EXEC_EXECUTORS_H__
+#define __ONERT_EXEC_EXECUTORS_H__
+
+#include "IExecutor.h"
+#include "ir/NNPkg.h"
+
+namespace onert
+{
+namespace exec
+{
+
+/**
+ * @brief Class to gather executors
+ */
+class Executors
+{
+public:
+ Executors(void) = default;
+ Executors(std::unique_ptr<ir::ModelEdges> model_edges) { _model_edges = std::move(model_edges); }
+ Executors(const Executors &) = delete;
+ Executors(Executors &&) = default;
+
+ // TODO Use Executor index
+ void emplace(ir::SubgraphIndex idx, std::unique_ptr<IExecutor> exec)
+ {
+ _executors.emplace(idx, std::move(exec));
+ }
+
+ std::unique_ptr<IExecutor> &at(ir::SubgraphIndex idx) { return _executors.at(idx); }
+
+ uint32_t inputSize() const;
+
+ uint32_t outputSize() const;
+
+ const ir::OperandInfo inputInfo(const ir::IOIndex &index);
+
+ const ir::OperandInfo outputInfo(const ir::IOIndex &index);
+
+ void execute(const IODescription &desc);
+
+private:
+ void executeEntries(const IODescription &desc);
+
+private:
+ // TODO Use Executor index
+ // Changing index will effect if/while compile and kernel implementation
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>> _executors;
+ // NOTE _model_edges may use different struct type for executor implementation
+ std::unique_ptr<ir::ModelEdges> _model_edges;
+};
+
+} // namespace exec
+} // namespace onert
+
+#endif // __ONERT_EXEC_EXECUTORS_H__
diff --git a/runtime/onert/core/include/exec/FunctionSequence.h b/runtime/onert/core/include/exec/FunctionSequence.h
index cf3f2a882..7ff6d8b8c 100644
--- a/runtime/onert/core/include/exec/FunctionSequence.h
+++ b/runtime/onert/core/include/exec/FunctionSequence.h
@@ -75,8 +75,7 @@ public:
public: // methods related to dynamic tensor
struct DynamicTensorCtx
{
- ir::OperationIndex op_ind;
- const ir::Operations *operations = nullptr;
+ const ir::Operation *op = nullptr;
std::shared_ptr<exec::DynamicShapeInferer> dynamic_shape_inferer = nullptr;
};
diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h
index adc68074f..bb5b5af98 100644
--- a/runtime/onert/core/include/exec/IExecutor.h
+++ b/runtime/onert/core/include/exec/IExecutor.h
@@ -107,8 +107,6 @@ struct IExecutor
virtual const std::vector<backend::builtin::IOTensor *> &getOutputTensors() const = 0;
};
-using ExecutorMap = std::unordered_map<ir::SubgraphIndex, std::unique_ptr<IExecutor>>;
-
} // namespace exec
} // namespace onert
diff --git a/runtime/onert/core/include/ir/Graph.h b/runtime/onert/core/include/ir/Graph.h
index 7a7688334..286caf72f 100644
--- a/runtime/onert/core/include/ir/Graph.h
+++ b/runtime/onert/core/include/ir/Graph.h
@@ -20,9 +20,9 @@
#include <functional>
#include <unordered_map>
+#include "ir/Model.h"
#include "ir/Operands.h"
#include "ir/Operations.h"
-#include "ir/Subgraphs.h"
namespace onert
{
@@ -50,7 +50,9 @@ private:
};
public:
- Graph(void);
+ explicit Graph(void);
+ explicit Graph(const Graph &);
+
~Graph(void);
// Graph Building
@@ -87,10 +89,9 @@ public:
void verify(void);
void removeOperand(const OperandIndex &ind) { _operands.remove(ind); }
void setLayout(Layout layout) { _layout = layout; }
- void setSubgraphs(const std::shared_ptr<Subgraphs> &subgs) { _subgraphs = subgs; }
- void setPartialgraphs(const std::shared_ptr<Subgraphs> &partialgraphs)
+ void setPartialModel(const std::shared_ptr<Model> &partial_model)
{
- _partialgraphs = partialgraphs;
+ _partialgraphs = partial_model;
}
void
setTensorName(std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> &tensor_names)
@@ -134,27 +135,25 @@ public:
Operands &operands() { return _operands; } // TODO Remove this non-const accessor
const Operations &operations() const { return _operations; }
Operations &operations() { return _operations; }
- const std::shared_ptr<Subgraphs> &subgraphs() const { return _subgraphs; }
- std::shared_ptr<Subgraphs> &subgraphs() { return _subgraphs; }
Layout layout() const { return _layout; }
- std::shared_ptr<Subgraphs> &partialgraphs() { return _partialgraphs; }
+ std::shared_ptr<Model> &partialgraphs() { return _partialgraphs; }
std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> &tensor_names()
{
return _tensor_names;
}
- std::unordered_map<std::string, IOIndex>::iterator _name_to_input_begin()
+ std::unordered_map<std::string, IOIndex>::const_iterator _name_to_input_begin() const
{
return _name_to_input.begin();
}
- std::unordered_map<std::string, IOIndex>::iterator _name_to_input_end()
+ std::unordered_map<std::string, IOIndex>::const_iterator _name_to_input_end() const
{
return _name_to_input.end();
}
- std::unordered_map<std::string, IOIndex>::iterator _name_to_output_begin()
+ std::unordered_map<std::string, IOIndex>::const_iterator _name_to_output_begin() const
{
return _name_to_output.begin();
}
- std::unordered_map<std::string, IOIndex>::iterator _name_to_output_end()
+ std::unordered_map<std::string, IOIndex>::const_iterator _name_to_output_end() const
{
return _name_to_output.end();
}
@@ -172,13 +171,11 @@ private:
OperandIndexSequence _outputs;
std::unordered_map<std::string, IOIndex> _name_to_input;
std::unordered_map<std::string, IOIndex> _name_to_output;
- // Child subgraphs
- std::shared_ptr<Subgraphs> _subgraphs;
// TFLite and circle's default layout is NHWC;
Layout _layout{Layout::NHWC};
- // Partial Graphs
- std::shared_ptr<ir::Subgraphs> _partialgraphs;
+ // model for partial graphs
+ std::shared_ptr<ir::Model> _partialgraphs;
std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> _tensor_names;
};
diff --git a/runtime/onert/core/include/ir/Index.h b/runtime/onert/core/include/ir/Index.h
index e01b090f3..f01a4c84d 100644
--- a/runtime/onert/core/include/ir/Index.h
+++ b/runtime/onert/core/include/ir/Index.h
@@ -38,6 +38,9 @@ using IOIndex = ::onert::util::Index<uint32_t, IOIndexTag>;
struct SubgraphIndexTag;
using SubgraphIndex = ::onert::util::Index<uint32_t, SubgraphIndexTag>;
+struct ModelIndexTag;
+using ModelIndex = ::onert::util::Index<uint32_t, ModelIndexTag>;
+
template <typename IndexType>
std::ostream &_index_print_impl(std::ostream &o, const std::string &prefix, IndexType index)
{
@@ -64,7 +67,12 @@ inline std::ostream &operator<<(std::ostream &o, const IOIndex &i)
inline std::ostream &operator<<(std::ostream &o, const SubgraphIndex &i)
{
- return _index_print_impl(o, "SUBGRAPH", i); // $ubgraph
+ return _index_print_impl(o, "SUBGRAPH", i);
+}
+
+inline std::ostream &operator<<(std::ostream &o, const ModelIndex &i)
+{
+ return _index_print_impl(o, "MODEL", i);
}
} // namespace ir
diff --git a/runtime/onert/core/include/ir/Layout.h b/runtime/onert/core/include/ir/Layout.h
index 082810172..0cdbcc2c8 100644
--- a/runtime/onert/core/include/ir/Layout.h
+++ b/runtime/onert/core/include/ir/Layout.h
@@ -18,6 +18,7 @@
#define __ONERT_IR_LAYOUT_H__
#include <functional>
+#include <stdexcept>
#include <string>
namespace onert
diff --git a/runtime/onert/core/include/ir/Model.h b/runtime/onert/core/include/ir/Model.h
new file mode 100644
index 000000000..c3c0d87b8
--- /dev/null
+++ b/runtime/onert/core/include/ir/Model.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_MODEL_H__
+#define __ONERT_IR_MODEL_H__
+
+#include <memory>
+#include <unordered_map>
+
+#include "ir/Index.h"
+#include "util/ObjectManager.h"
+
+namespace onert
+{
+namespace ir
+{
+
+class Graph;
+
+class Model
+{
+public:
+ Model() = default;
+ Model(const Model &obj) = default;
+ Model(Model &&) = default;
+ Model &operator=(const Model &) = default;
+ Model &operator=(Model &&) = default;
+ ~Model() = default;
+
+ /**
+ * @brief Put subgraph in the container with a new Index for that
+ *
+ * @param[in] subg Subgraph to be pushed
+ * @param[in] index Index of subgraph to be pushed
+ * @return Created
+ */
+ void push(SubgraphIndex index, const std::shared_ptr<Graph> &subg) { _subgraphs[index] = subg; }
+
+ /**
+ * @brief Remove the subgraph that is associated with the given index
+ *
+ * @param[in] index Index of the subgraph to be removed
+ * @return N/A
+ */
+ void remove(const SubgraphIndex &index) { _subgraphs.erase(index); }
+
+ /**
+ * @brief Get the subgraph that is associated with the given index
+ *
+ * @param[in] index Index of the subgraph to be returned
+ * @return Graph
+ */
+ const std::shared_ptr<Graph> &at(const SubgraphIndex &index) const
+ {
+ return _subgraphs.at(index);
+ }
+ /**
+ * @brief Get the subgraph that is associated with the given index
+ *
+ * @param[in] index Index of the subgraph to be returned
+ * @return Graph
+ */
+ std::shared_ptr<Graph> &at(const SubgraphIndex &index) { return _subgraphs.at(index); }
+
+ /**
+ * @brief Get the subgraph that is associated with the given index
+ *
+ * @param[in] index Index of the subgraph to be returned
+ * @return true if such entry exists otherwise false
+ */
+ bool exist(const SubgraphIndex &index) const
+ {
+ auto it = _subgraphs.find(index);
+ return it != _subgraphs.end();
+ }
+
+ /**
+ * @brief Iterate over the container with given function
+ *
+ * @param[in] fn Function to be run for every container entry
+ * @return N/A
+ */
+ void iterate(const std::function<void(const SubgraphIndex &, const Graph &)> &fn) const
+ {
+ for (const auto &e : _subgraphs)
+ {
+ fn(e.first, *e.second);
+ }
+ }
+
+ /**
+ * @brief Iterate over the container with given function
+ *
+ * @param[in] fn Function to be run for every container entry
+ * @return N/A
+ */
+ void iterate(const std::function<void(const SubgraphIndex &, Graph &)> &fn)
+ {
+ for (const auto &e : _subgraphs)
+ {
+ fn(e.first, *e.second);
+ }
+ }
+
+ /**
+ * @brief Get count of Subgraphs
+ *
+ * @return count of Subgraphs
+ */
+ size_t subgraphs_count() const { return _subgraphs.size(); }
+
+ /**
+ * @brief Return the primary subgraph
+ *
+ * @return std::shared_ptr<Graph> Primary subgraph
+ */
+ std::shared_ptr<Graph> primary_subgraph() const { return _subgraphs.at(SubgraphIndex{0}); }
+
+private:
+ std::unordered_map<SubgraphIndex, std::shared_ptr<Graph>> _subgraphs;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_MODEL_H__
diff --git a/runtime/onert/core/include/ir/NNPkg.h b/runtime/onert/core/include/ir/NNPkg.h
new file mode 100644
index 000000000..d9f825e85
--- /dev/null
+++ b/runtime/onert/core/include/ir/NNPkg.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_NNPKG_H__
+#define __ONERT_IR_NNPKG_H__
+
+#include <memory>
+#include <unordered_set>
+#include <vector>
+
+#include "ir/Index.h"
+#include "ir/Model.h"
+
+namespace onert
+{
+namespace ir
+{
+
+using IODesc = std::tuple<ModelIndex, SubgraphIndex, IOIndex>;
+
+struct ModelEdge
+{
+ IODesc from;
+ IODesc to;
+};
+
+struct ModelEdgeEqual
+{
+ bool operator()(const onert::ir::ModelEdge &lhs, const onert::ir::ModelEdge &rhs) const
+ {
+ return lhs.from == rhs.from && lhs.to == rhs.to;
+ }
+};
+
+struct ModelEdgeHash
+{
+ size_t operator()(const ::onert::ir::ModelEdge &edge) const noexcept
+ {
+ unsigned long long h1 = (std::get<0>(edge.from).value() << 24) |
+ (std::get<1>(edge.from).value() << 16) | std::get<2>(edge.from).value();
+ unsigned long long h2 = (std::get<0>(edge.to).value() << 24) |
+ (std::get<1>(edge.to).value() << 16) | std::get<2>(edge.to).value();
+ return h1 + h2;
+ }
+};
+
+inline std::ostream &operator<<(std::ostream &o, const IODesc &od)
+{
+ o << std::get<0>(od).value() << ":" << std::get<1>(od).value() << ":" << std::get<2>(od).value();
+ return o;
+}
+
+using ModelEdgeSet = std::unordered_set<ir::ModelEdge, ir::ModelEdgeHash, ir::ModelEdgeEqual>;
+
+/**
+ * @brief Struct to gather model I/O information in multimodel NN package
+ * Model I/O will have role one of below
+ * - Package input/output
+ * - Edge's start/finish point between model
+ */
+struct ModelEdges
+{
+ std::vector<ir::IODesc> pkg_inputs;
+ std::vector<ir::IODesc> pkg_outputs;
+ ModelEdgeSet edges;
+};
+
+class NNPkg
+{
+public:
+ NNPkg() = default;
+ NNPkg(const NNPkg &obj) = default;
+ NNPkg(NNPkg &&) = default;
+ NNPkg &operator=(const NNPkg &) = default;
+ NNPkg &operator=(NNPkg &&) = default;
+ ~NNPkg() = default;
+
+ NNPkg(std::shared_ptr<Model> model) { _models[ModelIndex{0}] = model; }
+ std::shared_ptr<Model> primary_model() { return _models.at(onert::ir::ModelIndex{0}); }
+
+ /**
+ * @brief Put model at index
+ *
+ * @param[in] model Model to be pushed
+ * @param[in] index Index where Model is to be pushed
+ */
+ void push(ModelIndex index, const std::shared_ptr<Model> &model) { _models[index] = model; }
+
+ /**
+ * @brief Get the count of model
+ *
+ * @return the count of models
+ */
+ size_t model_count() const { return _models.size(); }
+
+ /**
+ * @brief Get model at index
+ *
+ * @param[in] index Index of the model to be returned
+ * @return Model at index
+ */
+ const std::shared_ptr<Model> &model(const ModelIndex &index) const { return _models.at(index); }
+ /**
+ * @brief Get model at index
+ *
+ * @param[in] index Index of the model to be returned
+ * @return Model at index
+ */
+ std::shared_ptr<Model> &model(const ModelIndex &index) { return _models.at(index); }
+
+ /**
+ * @brief Get pkg_input at index
+ *
+ * @param[in] index Index of pkg_input to be returned
+ * @return IODesc at index
+ */
+ const IODesc &input(uint32_t index) const { return _edges.pkg_inputs[index]; }
+ /**
+ * @brief Get pkg_input at index
+ *
+ * @param[in] index Index of pkg_input to be returned
+ * @return IODesc at index
+ */
+ IODesc &input(uint32_t index) { return _edges.pkg_inputs[index]; }
+ /**
+ * @brief Add input at the end
+ *
+ * @param[in] input Input IODesc to be pushed
+ */
+ void addInput(const IODesc &input) { _edges.pkg_inputs.push_back(input); }
+
+ /**
+ * @brief Get pkg_output at index
+ *
+ * @param[in] index Index of pkg_output to be returned
+ * @return IODesc at index
+ */
+ const IODesc &output(uint32_t index) const { return _edges.pkg_outputs[index]; }
+ /**
+ * @brief Get pkg_output at index
+ *
+ * @param[in] index Index of pkg_output to be returned
+ * @return IODesc at index
+ */
+ IODesc &output(uint32_t index) { return _edges.pkg_outputs[index]; }
+ /**
+ * @brief Add output at the end
+ *
+ * @param[in] output Output IODesc to be pushed
+ */
+ void addOutput(const IODesc &output) { _edges.pkg_outputs.push_back(output); }
+
+ /**
+ * @brief Add edge between models at the end
+ *
+ * @param[in] from from IODesc
+ * @param[in] to to IODesc
+ */
+ void addEdge(const IODesc &from, const IODesc &to)
+ {
+ std::cout << from << " -> " << to << std::endl;
+ _edges.edges.insert(ModelEdge{from, to});
+ }
+ /**
+ * @brief Get model edge set
+ * @return Edge set reference
+ */
+ const ModelEdges &model_edges() { return _edges; }
+
+ // TODO: Add iterate() or getter for edges
+
+private:
+ std::unordered_map<ModelIndex, std::shared_ptr<Model>> _models;
+ ModelEdges _edges;
+};
+
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_NNPKG_H__
diff --git a/runtime/onert/core/include/ir/Subgraphs.h b/runtime/onert/core/include/ir/Subgraphs.h
deleted file mode 100644
index 6cb369447..000000000
--- a/runtime/onert/core/include/ir/Subgraphs.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_IR_SUBGRAPHS_H__
-#define __ONERT_IR_SUBGRAPHS_H__
-
-#include <memory>
-#include <unordered_map>
-
-#include "ir/Index.h"
-#include "util/ObjectManager.h"
-
-namespace onert
-{
-namespace ir
-{
-
-class Graph;
-
-class Subgraphs
-{
-public:
- Subgraphs() = default;
- Subgraphs(const Subgraphs &obj) = default;
- Subgraphs(Subgraphs &&) = default;
- Subgraphs &operator=(const Subgraphs &) = default;
- Subgraphs &operator=(Subgraphs &&) = default;
- ~Subgraphs() = default;
-
- /**
- * @brief Put subgraph in the container with a new Index for that
- *
- * @param[in] subg Subgraph to be pushed
- * @param[in] index Index of subgraph to be pushed
- * @return Created
- */
- void push(SubgraphIndex index, const std::shared_ptr<Graph> &subg) { _subgraphs[index] = subg; }
-
- /**
- * @brief Remove the subgraph that is associated with the given index
- *
- * @param[in] index Index of the subgraph to be removed
- * @return N/A
- */
- void remove(const SubgraphIndex &index) { _subgraphs.erase(index); }
-
- /**
- * @brief Get the subgraph that is associated with the given index
- *
- * @param[in] index Index of the subgraph to be returned
- * @return Graph
- */
- const std::shared_ptr<Graph> &at(const SubgraphIndex &index) const
- {
- return _subgraphs.at(index);
- }
- /**
- * @brief Get the subgraph that is associated with the given index
- *
- * @param[in] index Index of the subgraph to be returned
- * @return Graph
- */
- std::shared_ptr<Graph> &at(const SubgraphIndex &index) { return _subgraphs.at(index); }
-
- /**
- * @brief Get the subgraph that is associated with the given index
- *
- * @param[in] index Index of the subgraph to be returned
- * @return true if such entry exists otherwise false
- */
- bool exist(const SubgraphIndex &index) const
- {
- auto it = _subgraphs.find(index);
- return it != _subgraphs.end();
- }
-
- /**
- * @brief Iterate over the container with given function
- *
- * @param[in] fn Function to be run for every container entry
- * @return N/A
- */
- void iterate(const std::function<void(const SubgraphIndex &, const Graph &)> &fn) const
- {
- for (const auto &e : _subgraphs)
- {
- fn(e.first, *e.second);
- }
- }
-
- /**
- * @brief Iterate over the container with given function
- *
- * @param[in] fn Function to be run for every container entry
- * @return N/A
- */
- void iterate(const std::function<void(const SubgraphIndex &, Graph &)> &fn)
- {
- for (const auto &e : _subgraphs)
- {
- fn(e.first, *e.second);
- }
- }
-
- /**
- * @brief Get count of Subgraphs
- *
- * @return count of Subgraphs
- */
- size_t count() const { return _subgraphs.size(); }
-
- /**
- * @brief Return the primary subgraph
- *
- * @return std::shared_ptr<Graph> Primary sugraph
- */
- std::shared_ptr<Graph> primary() const { return _subgraphs.at(SubgraphIndex{0}); }
-
-private:
- std::unordered_map<SubgraphIndex, std::shared_ptr<Graph>> _subgraphs;
-};
-
-} // namespace ir
-} // namespace onert
-
-#endif // __ONERT_IR_SUBGRAPHS_H__
diff --git a/runtime/onert/core/include/ir/TypeInfo.h b/runtime/onert/core/include/ir/TypeInfo.h
index 0a00da5fd..3c5062795 100644
--- a/runtime/onert/core/include/ir/TypeInfo.h
+++ b/runtime/onert/core/include/ir/TypeInfo.h
@@ -50,11 +50,7 @@ public:
public:
DataType type() const { return _type; }
- float scale() const
- {
- assert(_quant.scales.size() == 1);
- return _quant.scales[0];
- }
+ float scale() const { return _quant.scales[0]; }
const std::vector<float> &scales() const { return _quant.scales; }
int32_t zero_point() const
{
diff --git a/runtime/onert/core/include/ir/operation/Bulk.h b/runtime/onert/core/include/ir/operation/Bulk.h
index 1825f7fad..3c20f392f 100644
--- a/runtime/onert/core/include/ir/operation/Bulk.h
+++ b/runtime/onert/core/include/ir/operation/Bulk.h
@@ -32,6 +32,8 @@ public:
struct Param
{
std::string binary_path;
+ std::vector<ir::Shape> origin_input_shapes;
+ std::vector<ir::Shape> origin_output_shapes;
};
public:
diff --git a/runtime/onert/core/include/util/CalculateActivationRange.h b/runtime/onert/core/include/util/CalculateActivationRange.h
index db76f9dde..4369ca53e 100644
--- a/runtime/onert/core/include/util/CalculateActivationRange.h
+++ b/runtime/onert/core/include/util/CalculateActivationRange.h
@@ -17,6 +17,8 @@
#ifndef __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
#define __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
+#include <limits>
+
#include "ir/InternalType.h"
namespace onert
diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst
index 89a9a6ac2..4bbc02ac3 100644
--- a/runtime/onert/core/include/util/Config.lst
+++ b/runtime/onert/core/include/util/Config.lst
@@ -20,7 +20,7 @@
// Name | Type | Default
CONFIG(GRAPH_DOT_DUMP , int , "0")
-CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;ruy;xnnpack;gpu_cl;bcq") // FIXME Remove bcq
+CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;ruy;xnnpack;gpu_cl;trix;bcq") // FIXME Remove bcq
CONFIG(OP_BACKEND_ALLOPS , std::string , "")
CONFIG(OP_BACKEND_MAP , std::string , "")
CONFIG(DISABLE_COMPILE , bool , "0")
diff --git a/runtime/onert/core/include/util/ConfigSource.h b/runtime/onert/core/include/util/ConfigSource.h
index da8bc8620..d53b8106d 100644
--- a/runtime/onert/core/include/util/ConfigSource.h
+++ b/runtime/onert/core/include/util/ConfigSource.h
@@ -17,17 +17,17 @@
#ifndef __ONERT_UTIL_CONFIG_SOURCE_H__
#define __ONERT_UTIL_CONFIG_SOURCE_H__
-#include <memory>
-
-#include "IConfigSource.h"
+#include <string>
+#include <unordered_map>
namespace onert
{
namespace util
{
-void config_source(std::unique_ptr<IConfigSource> &&source);
-void config_source_ext(std::unique_ptr<IConfigSource> &&source);
+using CfgKeyValues = std::unordered_map<std::string, std::string>;
+
+void setConfigKeyValues(const CfgKeyValues &keyValues);
bool toBool(const std::string &val);
int toInt(const std::string &val);
diff --git a/runtime/onert/core/include/util/EnvConfigSource.h b/runtime/onert/core/include/util/EnvConfigSource.h
deleted file mode 100644
index 8c5d0e8e9..000000000
--- a/runtime/onert/core/include/util/EnvConfigSource.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
-#define __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
-
-#include <unordered_map>
-
-#include "util/GeneralConfigSource.h"
-
-namespace onert
-{
-namespace util
-{
-
-class EnvConfigSource final : public GeneralConfigSource
-{
-public:
- std::string get(const std::string &key) const override;
-
-private:
- std::unordered_map<std::string, std::string> _default_attributes;
-};
-
-} // namespace util
-} // namespace onert
-
-#endif // __ONERT_UTIL_ENV_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/GeneralConfigSource.h b/runtime/onert/core/include/util/GeneralConfigSource.h
deleted file mode 100644
index dedc820ec..000000000
--- a/runtime/onert/core/include/util/GeneralConfigSource.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
-#define __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
-
-#include <unordered_map>
-
-#include "util/IConfigSource.h"
-
-namespace onert
-{
-namespace util
-{
-
-class GeneralConfigSource : public IConfigSource
-{
-public:
- GeneralConfigSource() = default;
-
- std::string get(const std::string &key) const override;
- void set(const std::string &key, const std::string &val);
-
-private:
- std::unordered_map<std::string, std::string> _map;
-};
-
-} // namespace util
-} // namespace onert
-
-#endif // __ONERT_UTIL_GLOBAL_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/IConfigSource.h b/runtime/onert/core/include/util/IConfigSource.h
deleted file mode 100644
index 07b09848a..000000000
--- a/runtime/onert/core/include/util/IConfigSource.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_UTIL_I_CONFIG_SOURCE_H__
-#define __ONERT_UTIL_I_CONFIG_SOURCE_H__
-
-#include <string>
-
-namespace onert
-{
-namespace util
-{
-
-struct IConfigSource
-{
- /**
- * @brief Destroy the IConfigSource object
- */
- virtual ~IConfigSource() = default;
-
- /**
- * @brief get the value for the matching key
- *
- * @param key string key to search
- * @return string value associated with the key
- */
- virtual std::string get(const std::string &key) const = 0;
-};
-
-} // namespace util
-} // namespace onert
-
-#endif // __ONERT_UTIL_I_CONFIG_SOURCE_H__
diff --git a/runtime/onert/core/include/util/ObjectManager.h b/runtime/onert/core/include/util/ObjectManager.h
index a493789fa..36b6c85c8 100644
--- a/runtime/onert/core/include/util/ObjectManager.h
+++ b/runtime/onert/core/include/util/ObjectManager.h
@@ -17,14 +17,13 @@
#ifndef __ONERT_UTIL_OBJECT_MANAGER_H__
#define __ONERT_UTIL_OBJECT_MANAGER_H__
-#include <unordered_map>
-#include <memory>
-#include <list>
-#include <functional>
+#include "util/logging.h"
+#include <cassert>
+#include <functional>
+#include <list>
#include <memory>
-
-#include "util/logging.h"
+#include <unordered_map>
namespace onert
{
@@ -208,7 +207,7 @@ public:
l.push_back(e.first);
}
- for (auto index : l)
+ for (auto &index : l)
{
fn(index, *_objects[index]);
}
diff --git a/runtime/onert/core/include/util/TracingCtx.h b/runtime/onert/core/include/util/TracingCtx.h
index 334257d87..da284d2fb 100644
--- a/runtime/onert/core/include/util/TracingCtx.h
+++ b/runtime/onert/core/include/util/TracingCtx.h
@@ -19,7 +19,7 @@
#include "ir/Graph.h"
#include "ir/Index.h"
-#include "ir/Subgraphs.h"
+#include "ir/Model.h"
#include <unordered_map>
#include <mutex>
@@ -37,29 +37,9 @@ class TracingCtx
public:
/**
* @brief Create and store unique session id managed by this class
- * Note that this constructor can be called by multiple sessions running in parallely.
- * Use this constructor only when there is only one subgraph in a model.
+ * @note This constructor can be called by multiple session running in parallely.
*/
- TracingCtx(const ir::Graph *primary_subgraph)
- {
- decideSessionID();
- _subgraph_indices.emplace(primary_subgraph, 0);
- }
-
- /**
- * @brief Create and store unique session id managed by this class
- * Note that this constructor can be called by multiple sessions running in parallely.
- */
- TracingCtx(const onert::ir::Subgraphs *subgraphs)
- {
- assert(subgraphs);
-
- decideSessionID();
-
- auto count = subgraphs->count();
- for (size_t i = 0; i < count; i++)
- _subgraph_indices.emplace(subgraphs->at(onert::ir::SubgraphIndex(i)).get(), i);
- }
+ TracingCtx(void) { decideSessionID(); }
uint32_t getSessionId() const { return _session_id; }
diff --git a/runtime/onert/core/src/backend/builtin/ExternalContext.h b/runtime/onert/core/src/backend/builtin/ExternalContext.h
index e67be988d..390dbb579 100644
--- a/runtime/onert/core/src/backend/builtin/ExternalContext.h
+++ b/runtime/onert/core/src/backend/builtin/ExternalContext.h
@@ -24,6 +24,8 @@
#include <ruy/ctx.h>
#include <ruy/tune.h>
+#include <memory>
+
namespace onert
{
namespace backend
diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
index 3d6358d9d..fa2fc0b94 100644
--- a/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
+++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.cc
@@ -16,12 +16,10 @@
#include "KernelGenerator.h"
-#include <backend/BackendContext.h>
-#include <util/Utils.h>
#include "kernel/IfLayer.h"
-#include "kernel/WhileLayer.h"
#include "kernel/PermuteLayer.h"
-#include "exec/ExecutorBase.h"
+#include "kernel/WhileLayer.h"
+
#include "exec/FunctionSequence.h"
namespace onert
@@ -35,12 +33,12 @@ KernelGenerator::KernelGenerator(const ir::Graph &graph, DynamicTensorManager *d
const std::shared_ptr<TensorRegistry> &tensor_reg,
const std::shared_ptr<ExternalContext> &external_context)
: basic::KernelGeneratorBase{graph}, _dyn_tensor_manager{dyn_tensor_manager},
- _tensor_reg{tensor_reg}, _tensor_registries{}, _executor_map{nullptr}, _external_context{
- external_context}
+ _tensor_reg{tensor_reg}, _tensor_registries{}, _executors{nullptr}, _external_context{
+ external_context}
{
UNUSED_RELEASE(_graph);
UNUSED_RELEASE(_tensor_registries);
- UNUSED_RELEASE(_executor_map);
+ UNUSED_RELEASE(_executors);
}
std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
@@ -48,20 +46,16 @@ std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationI
assert(_dyn_tensor_manager);
assert(_tensor_reg);
- auto dyn_shape_inferer =
- std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
-
auto ret = std::make_unique<exec::FunctionSequence>();
// Prepare to handle dynamic tensors later
auto dyn_ctx = std::make_shared<exec::FunctionSequence::DynamicTensorCtx>();
{
- dyn_ctx->op_ind = ind;
- dyn_ctx->operations = &_graph.operations();
- dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
-
- ret->dynamic_tensor_ctx(dyn_ctx);
+ dyn_ctx->op = &_graph.operations().at(ind);
+ dyn_ctx->dynamic_shape_inferer =
+ std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg);
}
+ ret->dynamic_tensor_ctx(dyn_ctx);
auto &op = _graph.operations().at(ind);
op.accept(*this);
@@ -90,12 +84,12 @@ void KernelGenerator::visit(const ir::operation::If &node)
output_tensors.emplace_back(output_tensor);
}
- // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of
+ // IfLayer just set Executors instead of then and else executor to avoid complexity of
// creating executor recusively
const auto cond_tensor = input_tensors.front();
input_tensors.erase(input_tensors.begin());
auto fn = std::make_unique<::onert::backend::builtin::kernel::IfLayer>(
- cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executor_map,
+ cond_tensor, input_tensors, output_tensors, then_subg_index, else_subg_index, _executors,
_external_context);
_return_fn = std::move(fn);
@@ -136,10 +130,10 @@ void KernelGenerator::visit(const ir::operation::While &node)
output_tensors.emplace_back(output_tensor);
}
- // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of
+ // WhileLayer just set Executors instead of cond and body executor to avoid complexity of
// creating executor recusively
auto fn = std::make_unique<::onert::backend::builtin::kernel::WhileLayer>(
- input_tensors, output_tensors, cond_subg_index, body_subg_index, _executor_map,
+ input_tensors, output_tensors, cond_subg_index, body_subg_index, _executors,
_dyn_tensor_manager->dynamic_mem_mgr().get(), _external_context);
_return_fn = std::move(fn);
diff --git a/runtime/onert/core/src/backend/builtin/KernelGenerator.h b/runtime/onert/core/src/backend/builtin/KernelGenerator.h
index 00ad962b9..d5931ca26 100644
--- a/runtime/onert/core/src/backend/builtin/KernelGenerator.h
+++ b/runtime/onert/core/src/backend/builtin/KernelGenerator.h
@@ -17,13 +17,14 @@
#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
#define __ONERT_BACKEND_BUILTIN_KERNEL_GENERATOR_H__
-#include "exec/IExecutor.h"
+#include "DynamicTensorManager.h"
#include "ExternalContext.h"
-#include "ir/Graph.h"
-#include "TensorBuilder.h"
-#include "compiler/TensorRegistries.h"
-#include "backend/basic/KernelGeneratorBase.h"
#include "TensorRegistry.h"
+#include "../../compiler/TensorRegistries.h"
+
+#include "backend/basic/KernelGeneratorBase.h"
+#include "exec/Executors.h"
+#include "ir/Graph.h"
namespace onert
{
@@ -43,10 +44,10 @@ public:
{
_tensor_registries = tensor_registries;
}
- void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map)
+ void setExecutors(const std::shared_ptr<exec::Executors> &executors)
{
// FIXME Using shared_ptr's raw pointer!
- _executor_map = executor_map.get();
+ _executors = executors.get();
}
std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
@@ -64,7 +65,7 @@ private:
DynamicTensorManager *_dyn_tensor_manager;
std::shared_ptr<TensorRegistry> _tensor_reg;
compiler::TensorRegistries _tensor_registries;
- exec::ExecutorMap *_executor_map;
+ exec::Executors *_executors;
const std::shared_ptr<ExternalContext> _external_context;
};
diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
index fdd9d9d14..cdb41960a 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
+++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.cc
@@ -16,10 +16,6 @@
#include "IfLayer.h"
-#include <backend/ITensor.h>
-#include "exec/ExecutorBase.h"
-#include "PermuteLayer.h"
-
namespace onert
{
namespace backend
@@ -33,13 +29,13 @@ IfLayer::IfLayer(backend::IPortableTensor *cond_tensor,
const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
- exec::ExecutorMap *executor_map,
+ exec::Executors *executors,
const std::shared_ptr<ExternalContext> &external_context)
: _cond_tensor{cond_tensor}, _input_tensors{input_tensors}, _output_tensors{output_tensors},
- _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index},
- _executor_map{executor_map}, _external_context{external_context}
+ _then_subg_index{then_subg_index}, _else_subg_index{else_subg_index}, _executors{executors},
+ _external_context{external_context}
{
- // At this point, executor_map may not have executors of then subg and else subg
+ // At this point, executors may not have executors of then subg and else subg
}
void IfLayer::run()
@@ -65,12 +61,12 @@ void IfLayer::run()
if (cond_result)
{
VERBOSE(If) << "Call to $" << _then_subg_index << " (then)" << std::endl;
- subg_exec = _executor_map->at(_then_subg_index).get();
+ subg_exec = _executors->at(_then_subg_index).get();
}
else
{
VERBOSE(If) << "Call to $" << _else_subg_index << " (else)" << std::endl;
- subg_exec = _executor_map->at(_else_subg_index).get();
+ subg_exec = _executors->at(_else_subg_index).get();
}
subg_exec->execute(_input_tensors, _output_tensors);
diff --git a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
index f12ef3605..fa5537a67 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
+++ b/runtime/onert/core/src/backend/builtin/kernel/IfLayer.h
@@ -18,7 +18,7 @@
#define __ONERT_BACKEND_BUILTIN_KERNEL_IF_LAYER_H__
#include <backend/IPortableTensor.h>
-#include <exec/IExecutor.h>
+#include <exec/Executors.h>
#include "../ExternalContext.h"
namespace onert
@@ -37,8 +37,7 @@ public:
const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &then_subg_index, const ir::SubgraphIndex &else_subg_index,
- exec::ExecutorMap *executor_map,
- const std::shared_ptr<ExternalContext> &external_context);
+ exec::Executors *executors, const std::shared_ptr<ExternalContext> &external_context);
public:
void run() override;
@@ -49,7 +48,7 @@ private:
const std::vector<backend::IPortableTensor *> _output_tensors;
const ir::SubgraphIndex _then_subg_index;
const ir::SubgraphIndex _else_subg_index;
- exec::ExecutorMap *_executor_map;
+ exec::Executors *_executors;
const std::shared_ptr<ExternalContext> _external_context;
};
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
index 20cd87ad1..ddaecdf57 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.cc
@@ -16,9 +16,9 @@
#include "PermuteLayer.h"
-#include "exec/ShapeConverter.h"
+#include "../../../exec/ShapeConverter.h"
-#include "ruy/context.h" // from @ruy
+#include <ruy/context.h> // from @ruy
namespace onert
{
diff --git a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
index ac5470e85..227e32434 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
+++ b/runtime/onert/core/src/backend/builtin/kernel/PermuteLayer.h
@@ -17,10 +17,10 @@
#ifndef __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
#define __ONERT_BACKEND_BUILTIN_KERNEL_PERMUTELAYER_H__
-#include "exec/IPermuteFunction.h"
-#include "exec/IExecutor.h"
#include "../ExternalContext.h"
-#include "ruy/thread_pool.h" // from @ruy
+#include "../../../exec/IPermuteFunction.h"
+
+#include <ruy/thread_pool.h> // from @ruy
namespace onert
{
diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
index 81b4a6378..8e006c5ea 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
+++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.cc
@@ -16,11 +16,12 @@
#include "WhileLayer.h"
-#include <algorithm>
-#include <backend/ITensor.h>
-#include "exec/ExecutorBase.h"
-#include <misc/polymorphic_downcast.h>
#include "PermuteLayer.h"
+#include "../../../exec/ExecutorBase.h"
+
+#include <misc/polymorphic_downcast.h>
+
+#include <algorithm>
namespace onert
{
@@ -34,14 +35,14 @@ namespace kernel
WhileLayer::WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &cond_subg_index,
- const ir::SubgraphIndex &body_subg_index, exec::ExecutorMap *executor_map,
+ const ir::SubgraphIndex &body_subg_index, exec::Executors *executors,
basic::DynamicMemoryManager *dyn_memory_manager,
const std::shared_ptr<ExternalContext> &external_context)
: _cond_subg_index{cond_subg_index}, _body_subg_index{body_subg_index},
- _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executor_map{executor_map},
+ _input_tensors{input_tensors}, _output_tensors{output_tensors}, _executors{executors},
_dyn_memory_manager{dyn_memory_manager}, _external_context{external_context}
{
- // At this point, executor_map may not have executors of cond subg and body subg
+ // At this point, executors may not have executors of cond subg and body subg
}
void WhileLayer::run()
@@ -56,8 +57,8 @@ void WhileLayer::run()
// // Run cond subg
// If there is no loop copy "_input_tensors" -> "_dst_tensors", else copy "cond subg inputs" ->
// "_dst_tensors"
- auto cond_exec = _executor_map->at(_cond_subg_index).get();
- auto body_exec = _executor_map->at(_body_subg_index).get();
+ auto cond_exec = _executors->at(_cond_subg_index).get();
+ auto body_exec = _executors->at(_body_subg_index).get();
// Need a temp tensor to hold the cond subgraph output
assert(cond_exec->getOutputTensors().size() == 1);
diff --git a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
index 912102781..8551b3d09 100644
--- a/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
+++ b/runtime/onert/core/src/backend/builtin/kernel/WhileLayer.h
@@ -18,7 +18,7 @@
#define __ONERT_BACKEND_BUILTIN_KERNEL_WHILE_LAYER_H__
#include <backend/IPortableTensor.h>
-#include <exec/IExecutor.h>
+#include <exec/Executors.h>
#include <exec/IFunction.h>
#include <ir/OperandIndexSequence.h>
#include <ir/Graph.h>
@@ -41,7 +41,7 @@ public:
WhileLayer(const std::vector<backend::IPortableTensor *> input_tensors,
const std::vector<backend::IPortableTensor *> output_tensors,
const ir::SubgraphIndex &cond_subg_index, const ir::SubgraphIndex &body_subg_index,
- exec::ExecutorMap *executor_map, basic::DynamicMemoryManager *dyn_memory_manager,
+ exec::Executors *executors, basic::DynamicMemoryManager *dyn_memory_manager,
const std::shared_ptr<ExternalContext> &external_context);
public:
@@ -52,7 +52,7 @@ private:
const ir::SubgraphIndex _body_subg_index;
const std::vector<backend::IPortableTensor *> _input_tensors;
const std::vector<backend::IPortableTensor *> _output_tensors;
- exec::ExecutorMap *_executor_map;
+ exec::Executors *_executors;
basic::DynamicMemoryManager *_dyn_memory_manager; // For generating temp tensors
const std::shared_ptr<ExternalContext> _external_context;
};
diff --git a/runtime/onert/core/src/compiler/BackendManager.cc b/runtime/onert/core/src/compiler/BackendManager.cc
index 0d6051b21..44442c065 100644
--- a/runtime/onert/core/src/compiler/BackendManager.cc
+++ b/runtime/onert/core/src/compiler/BackendManager.cc
@@ -16,16 +16,11 @@
#include "compiler/BackendManager.h"
-#include <memory>
-#include <dlfcn.h>
+#include "../backend/builtin/Backend.h"
+#include "../backend/builtin/Config.h"
-#include "backend/Backend.h"
-#include "backend/builtin/Backend.h"
-#include "backend/builtin/Config.h"
-#include "backend/IConfig.h"
-#include "util/logging.h"
-#include "util/ConfigSource.h"
-#include "misc/string_helpers.h"
+#include <dlfcn.h>
+#include <memory>
static const char *SHARED_LIB_EXT =
#if defined(__APPLE__) && defined(__MACH__)
@@ -152,7 +147,7 @@ const backend::Backend *BackendManager::get(const std::string &key) const
return nullptr;
}
-const backend::builtin::Backend *BackendManager::getBuiltin() const { return _builtin; }
+const backend::Backend *BackendManager::getBuiltin() const { return _builtin; }
} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc
index 6a1d8fcec..7be9c1e3b 100644
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -18,29 +18,27 @@
#include "ExecutorFactory.h"
#include "ShapeValidator.h"
+#include "pass/ConstantOutputPass.h"
+#include "pass/OddOutputPass.h"
+#include "pass/PassRunner.h"
+#include "pass/UnusedOperandEliminationPass.h"
+#include "../backend/builtin/Config.h"
+#include "../dumper/dot/DotDumper.h"
+#include "../interp/InterpExecutor.h"
+#include "../ir/OperationCloner.h"
+#include "../ir/OperationDumper.h"
+#include "../ir/verifier/Verifier.h"
-#include <backend/builtin/Config.h>
-#include "compiler/BackendManager.h"
-#include "compiler/IScheduler.h"
-#include "compiler/ManualScheduler.h"
-#include "compiler/HEScheduler.h"
#include "compiler/StaticShapeInferer.h"
-#include "compiler/OperationLowerInfo.h"
-#include "compiler/pass/ConstantOutputPass.h"
-#include "compiler/pass/OddOutputPass.h"
-#include "compiler/pass/PassRunner.h"
-#include "compiler/pass/UnusedOperandEliminationPass.h"
-#include "exec/ExecTime.h"
-#include "ir/verifier/Verifier.h"
-#include "dumper/dot/DotDumper.h"
-#include "compiler/Linear.h"
-#include "interp/InterpExecutor.h"
#include "util/ConfigSource.h"
#include "util/logging.h"
-#include "ir/OperationDumper.h"
-#include "ir/OperationCloner.h"
-#include "misc/string_helpers.h"
-#include "json/json.h"
+
+#include <misc/polymorphic_downcast.h>
+#include <misc/string_helpers.h>
+#include <json/json.h>
+
+// TODO Remove using fstream header
+#include <fstream>
namespace
{
@@ -86,8 +84,104 @@ void verboseOptions(compiler::CompilerOptions &options)
<< std::noboolalpha;
}
-void setBackendMap(compiler::ManualSchedulerOptions &ms_options, const ir::Subgraphs &subgs,
- const std::string &str)
+std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::StaticShapeInferer>>
+createStaticShapeInferers(
+ const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
+ &lowered_subgs)
+{
+ // Allocate StaticShapeInferer per each subgraph
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::StaticShapeInferer>> inferers;
+ for (auto &pair : lowered_subgs)
+ {
+ const auto &subg_index = pair.first;
+ auto &lowered_subg = pair.second;
+ inferers[subg_index] = std::make_unique<compiler::StaticShapeInferer>(lowered_subg.get());
+ }
+
+ // Append observers in all StaticShapeInferers
+ for (auto &pair : lowered_subgs)
+ {
+ const auto &subg_index = pair.first;
+ auto &lowered_subg = pair.second;
+
+ // TODO: Change this iteration for all to controlflow iteration
+ lowered_subg->graph().operations().iterate([&](const ir::OperationIndex &,
+ const ir::Operation &op) {
+ // A Function to append child inferers. These make it possible for a StaticShapeInferer to
+ // call StaticShapeInferes of child subgraphs recursively
+ auto appendChildInferer = [&](const ir::SubgraphIndex &child_subg_idx) {
+ auto *child_inferer = inferers.at(child_subg_idx).get();
+ inferers.at(subg_index)->appendChildInferer(child_subg_idx, child_inferer);
+ };
+
+ // A Function to appaend subg input observers. This makes it possible for a StaticShapeInferer
+ // to update inputs of child subgraphs
+ auto appendSubgraphInputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+ std::vector<ir::Operand *> child_subg_inputs;
+ auto &child_subg = lowered_subgs.at(child_subg_idx)->graph();
+ for (const auto &input_idx : child_subg.getInputs())
+ {
+ auto operand_ptr = child_subg.operands().getRawPtr(input_idx);
+ child_subg_inputs.emplace_back(operand_ptr);
+ }
+ inferers.at(subg_index)
+ ->appendSubgInputObserver(child_subg_idx,
+ std::make_unique<compiler::OperandObserver>(child_subg_inputs));
+ };
+
+ // A Function to set controlflow output observers. This makes it possible for a
+ // StaticShapeInferer to update outputs of parent controlflow opeerations
+ auto setControlFlowOutputObserver = [&](const ir::SubgraphIndex &child_subg_idx) {
+ std::vector<ir::Operand *> cf_outputs;
+ auto &subg = lowered_subg->graph();
+ for (const auto &output_idx : op.getOutputs())
+ {
+ auto operand_ptr = subg.operands().getRawPtr(output_idx);
+ cf_outputs.emplace_back(operand_ptr);
+ }
+ inferers.at(child_subg_idx)
+ ->setControlflowOutputObserver(std::make_unique<compiler::OperandObserver>(cf_outputs));
+ };
+
+ // Append Observers in a StaticShapeInferer
+ if (op.opcode() == ir::OpCode::If)
+ {
+ const auto &if_op = nnfw::misc::polymorphic_downcast<const ir::operation::If &>(op);
+
+ appendChildInferer(if_op.param().then_subg_index);
+ appendChildInferer(if_op.param().else_subg_index);
+
+ appendSubgraphInputObserver(if_op.param().then_subg_index);
+ appendSubgraphInputObserver(if_op.param().else_subg_index);
+
+ setControlFlowOutputObserver(if_op.param().then_subg_index);
+ }
+ else if (op.opcode() == ir::OpCode::While)
+ {
+ const auto &while_op = nnfw::misc::polymorphic_downcast<const ir::operation::While &>(op);
+
+ appendChildInferer(while_op.param().cond_subg_index);
+ appendChildInferer(while_op.param().body_subg_index);
+
+ appendSubgraphInputObserver(while_op.param().cond_subg_index);
+ appendSubgraphInputObserver(while_op.param().body_subg_index);
+
+ setControlFlowOutputObserver(while_op.param().body_subg_index);
+ }
+ });
+ }
+
+ return inferers;
+}
+
+} // namespace
+
+namespace onert
+{
+
+namespace compiler
+{
+void ManualSchedulerOptions::setBackendMap(const std::string &str)
{
// TODO Support multiple subgraphs for manual scheduling
auto key_val_list = nnfw::misc::split(str, ';');
@@ -102,37 +196,24 @@ void setBackendMap(compiler::ManualSchedulerOptions &ms_options, const ir::Subgr
const auto &key_str = key_val.at(0);
const auto &val = key_val.at(1);
auto key = static_cast<uint32_t>(std::stoi(key_str));
-
- subgs.at(ir::SubgraphIndex{0})
- ->operations()
- .at(ir::OperationIndex{key}); // Check if exist, or this wil throw
- ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val);
+ this->index_to_backend.emplace(ir::OperationIndex{key}, val);
}
}
-} // namespace
-
-namespace onert
-{
-
-namespace compiler
+std::unique_ptr<CompilerOptions> CompilerOptions::fromGlobalConfig()
{
-
-CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
-{
- CompilerOptions options;
- options.backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
- options.trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
- options.graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
- options.executor = util::getConfigString(util::config::EXECUTOR);
- options.he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
- options.he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
- options.disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
- options.fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
-
+ auto o = std::make_unique<CompilerOptions>();
+ o->backend_list = nnfw::misc::split(util::getConfigString(util::config::BACKENDS), ';');
+ o->trace_filepath = util::getConfigString(util::config::TRACE_FILEPATH);
+ o->graph_dump_level = util::getConfigInt(util::config::GRAPH_DOT_DUMP);
+ o->executor = util::getConfigString(util::config::EXECUTOR);
+ o->he_scheduler = util::getConfigBool(util::config::USE_SCHEDULER);
+ o->he_profiling_mode = util::getConfigBool(util::config::PROFILING_MODE);
+ o->disable_compile = util::getConfigBool(util::config::DISABLE_COMPILE);
+ o->fp16_enable = util::getConfigBool(util::config::FP16_ENABLE);
{
// Backend for all
- auto &ms_options = options.manual_scheduler_options;
+ auto &ms_options = o->manual_scheduler_options;
// Default value for op_backend_all is first element in the backend list
ms_options.backend_for_all = util::getConfigString(util::config::OP_BACKEND_ALLOPS);
@@ -151,54 +232,67 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
// Index to Backend
auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
- setBackendMap(ms_options, subgs, map_str);
+ ms_options.setBackendMap(map_str);
}
- return options;
+ return o;
}
-Compiler::Compiler(const std::shared_ptr<ir::Subgraphs> &subgs, util::TracingCtx *tracing_ctx)
- : _subgraphs{subgs}, _state{State::CREATED}
+Compiler::Compiler(const std::shared_ptr<ir::Model> &model, CompilerOptions &copt)
+ : _nnpkg{std::make_shared<ir::NNPkg>(model)}, _state{State::CREATED}, _voptions{&copt}
{
- // Set default values for CompilerOptions
- // All these default values should not be fetched from Env, when we stop supporting Android NN
- // API.
- _options = fetchCompilerOptionsFromGlobalConfig(*subgs);
-
- _options.tracing_ctx = tracing_ctx;
+ // DO NOTHING
}
-void Compiler::enableToFp16() { _options.fp16_enable = true; }
+Compiler::Compiler(const std::shared_ptr<ir::NNPkg> &nnpkg,
+ std::vector<std::unique_ptr<CompilerOptions>> &copts)
+ : _nnpkg{nnpkg}, _state{State::CREATED}, _voptions{}
+{
+ for (uint32_t i = 0; i < copts.size(); i++)
+ {
+ _voptions.push_back(copts[i].get());
+ }
+}
-void Compiler::set_backend_from_str(const char *backend_settings)
+void Compiler::enableToFp16()
{
- assert(_subgraphs != nullptr);
- // Backend for all
- auto &ms_options = _options.manual_scheduler_options;
- setBackendMap(ms_options, *_subgraphs, std::string{backend_settings});
+ for (auto options : _voptions)
+ options->fp16_enable = true;
}
void Compiler::checkProfilerConditions()
{
- if (!_options.he_scheduler)
+ if (_nnpkg->model_count() != 1)
+ throw std::runtime_error("NYI: Profiling mode for multiple model is not supported yet");
+
+ auto &options = *_voptions[0];
+
+ if (options.he_scheduler)
throw std::runtime_error("Heterogeneous scheduler must be enabled during profiling.");
- if (_options.executor != "Dataflow")
+ if (options.executor != "Dataflow")
throw std::runtime_error("Profiling mode works only with 'Dataflow' executor");
}
bool Compiler::buildPartialGraph(uint32_t num_graphs)
{
- if (_subgraphs->count() > 1)
+ // Use 1st model and options only on partial graph (pipeline) compile
+ assert(_nnpkg->model_count() == 1);
+ assert(_voptions.size() == 1);
+
+ auto model = _nnpkg->primary_model();
+ auto &options = *_voptions[0];
+
+ if (model->subgraphs_count() > 1)
return false;
- auto partialgraphs = std::make_shared<ir::Subgraphs>();
+ auto partialgraphs = std::make_shared<ir::Model>();
for (uint32_t idx = 0; idx < num_graphs; idx++)
{
auto partialgraph = std::make_unique<ir::Graph>();
partialgraphs->push(ir::SubgraphIndex{idx}, std::move(partialgraph));
}
- _subgraphs->primary()->setPartialgraphs(partialgraphs);
+ model->primary_subgraph()->setPartialModel(partialgraphs);
auto partial_graph = primary_subgraph()->partialgraphs();
@@ -208,8 +302,8 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
for (auto use_operation : use_operations)
{
- auto graph_index = _options.partial_graph_options.index_to_graph.find(use_operation);
- if (graph_index == _options.partial_graph_options.index_to_graph.end())
+ auto graph_index = options.partial_graph_options.index_to_graph.find(use_operation);
+ if (graph_index == options.partial_graph_options.index_to_graph.end())
{
throw std::runtime_error("Invalid Partition Map");
}
@@ -230,8 +324,8 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
primary_subgraph()->operations().iterate(
[&](const ir::OperationIndex &operation_index, const ir::Operation &operation) {
- auto graph_index = _options.partial_graph_options.index_to_graph.find(operation_index);
- if (graph_index == _options.partial_graph_options.index_to_graph.end())
+ auto graph_index = options.partial_graph_options.index_to_graph.find(operation_index);
+ if (graph_index == options.partial_graph_options.index_to_graph.end())
{
throw std::runtime_error("Invalid Partition Map");
}
@@ -259,7 +353,7 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
assert(new_operation_index == operation_index);
});
- for (uint32_t idx = 0; idx < partial_graph->count(); idx++)
+ for (uint32_t idx = 0; idx < partial_graph->subgraphs_count(); idx++)
{
auto partition = partial_graph->at(ir::SubgraphIndex{idx});
@@ -282,10 +376,10 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
auto use_operations = primary_subgraph()->operands().at(operand_index).getUses();
auto iter = use_operations.begin();
ir::SubgraphIndex graph_index =
- _options.partial_graph_options.index_to_graph.find(*iter++)->second;
+ options.partial_graph_options.index_to_graph.find(*iter++)->second;
while (iter != use_operations.end())
{
- if (graph_index != _options.partial_graph_options.index_to_graph.find(*iter)->second &&
+ if (graph_index != options.partial_graph_options.index_to_graph.find(*iter)->second &&
!partition->getOutputs().contains(operand_index))
{
partition->addOutput(operand_index,
@@ -344,96 +438,157 @@ bool Compiler::buildPartialGraph(uint32_t num_graphs)
return true;
}
-std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
+std::shared_ptr<CompilerArtifact> Compiler::compile(void)
{
- // Set control flow backend for control flow operators
+ for (auto options : _voptions)
{
+ // Set control flow backend for control flow operators
auto &builtin_id = backend::builtin::Config::ID;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
- }
+ options->manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
+ options->manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
+ options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
- // FIXME This is a workaround for bcq operations, should remove it
- {
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+ // FIXME This is a workaround for bcq operations, should remove it
+ options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+ options->manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+
+ // FIXME This is a workaround for bulk operations, should remove it
+ options->manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
+
+ verboseOptions(*options);
}
- verboseOptions(_options);
+ // NYI: allow one model compilation
+ auto const model_count = _nnpkg->model_count();
+ if (model_count != _voptions.size())
+ throw std::runtime_error{"Model count and option vector size mismatch"};
- _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
- // Mandatory passes
- pass::PassRunner{}
- .append(std::make_unique<pass::ConstantOutputPass>(subg))
- .append(std::make_unique<pass::OddOutputPass>(subg))
- .run();
+ for (uint32_t i = 0; i < model_count; i++)
+ {
+ _nnpkg->model(ir::ModelIndex{i})->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+ // Mandatory passes
+ pass::PassRunner{}
+ .append(std::make_unique<pass::ConstantOutputPass>(subg))
+ .append(std::make_unique<pass::OddOutputPass>(subg))
+ .run();
- // Optimizations
- pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
- });
+ // Optimizations
+ pass::PassRunner{}.append(std::make_unique<pass::UnusedOperandEliminationPass>(subg)).run();
+ });
+ }
/***************************************************
* Prepare compilation phase
***************************************************/
- auto executors = std::make_shared<exec::ExecutorMap>();
-
// Compilable check
// TODO: Support hybrid execution -
// execution between interpreter and compiled executor (including control flow)
- if (_options.disable_compile)
+ if (_voptions[0]->disable_compile)
{
- _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
+ if (model_count > 1)
+ throw std::runtime_error{"NYI: Disable compilation for multi model is not supported yet"};
+
+ auto executors = std::make_shared<exec::Executors>();
+
+ _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
});
_state = State::COMPILED;
- return executors;
+ return std::make_shared<CompilerArtifact>(executors, nullptr);
}
// Mode check
- if (_options.he_profiling_mode)
+ // TODO handle option for each model
+ if (_voptions[0]->he_profiling_mode)
checkProfilerConditions();
/***************************************************
* Backend independent analysis & optimization phase
***************************************************/
- auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level);
+ // TODO Handle dump level for each model
+ auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_voptions[0]->graph_dump_level);
+ onert::dumper::dot::DotDumper dot_dumper(dump_level);
+
+ // Tracing context
+ auto tracing_ctx = std::make_unique<util::TracingCtx>();
+
+ // Model edge context
+ std::unique_ptr<ir::ModelEdges> model_edges = nullptr;
// Lower: Assign backend
std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs;
- _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
- onert::dumper::dot::DotDumper dot_dumper(subg, dump_level);
- dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value()));
- // Lower: Assign backend
- lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options);
+ if (model_count == 1)
+ {
+ _nnpkg->primary_model()->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
+ dot_dumper.dump(subg, nnfw::misc::str("before_lower_subg-", index.value()));
+ // Lower: Assign backend
+ lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, *_voptions[0]);
+ // Set tracing_ctx for copied graph
+ tracing_ctx->setSubgraphIndex(&(lowered_subgs[index]->graph()), index.value());
+ });
+ }
+ else
+ {
+ // TODO Support tracing_ctx for multiple model
+ tracing_ctx = nullptr;
+
+ // Copy model edge context
+ model_edges = std::make_unique<ir::ModelEdges>(_nnpkg->model_edges());
- subg.setSubgraphs(nullptr);
- });
+ for (uint32_t i = 0; i < model_count; i++)
+ {
+ auto model = _nnpkg->model(ir::ModelIndex{i});
+ if (model->subgraphs_count() != 1)
+ throw std::runtime_error{"NYI: Lowering subgraphs for multiple model is not supported yet"};
+ auto subg = model->primary_subgraph();
+ dot_dumper.dump(*subg, nnfw::misc::str("before_lower_model-", i));
+
+ // For multimodel, model index is used for lowered graph index in lowered graph map
+ // and index type is SubgraphIndex
+ // TODO Find better way to represent lowered graph index for multimodel's subgraph
+ lowered_subgs[ir::SubgraphIndex{i}] =
+ std::make_unique<compiler::LoweredGraph>(*model->primary_subgraph(), *_voptions[i]);
+ }
+ }
- _subgraphs.reset();
+ _nnpkg.reset();
for (auto &pair : lowered_subgs)
{
const auto &subg_index = pair.first;
auto &lowered_subg = pair.second;
- onert::dumper::dot::DotDumper dot_dumper_lowered(lowered_subg.get(), dump_level);
- dot_dumper_lowered.dump("after_lower_subg-" + std::to_string(subg_index.value()));
+ dot_dumper.dump(*lowered_subg, "after_lower_subg-" + std::to_string(subg_index.value()));
}
// Shape inference.
{
- const auto primary_subg_idx = ir::SubgraphIndex{0};
- StaticShapeInferer inferer(primary_subg_idx, lowered_subgs);
- auto &lowered_subg = lowered_subgs.at(primary_subg_idx);
- auto ordered_ops = lowered_subg->graph().topolSortOperations();
- for (auto op_ind : ordered_ops)
+ // Run the StaticShapeInfer of primary subg. All child StaticShapeInferers are called
+ // recursively
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+ createStaticShapeInferers(lowered_subgs);
+
+ if (model_count == 1)
{
- const auto &op = lowered_subg->graph().operations().at(op_ind);
- bool has_dynamic_tensor = inferer.infer(op);
- lowered_subg->setHasDynamicTensor(op_ind, has_dynamic_tensor);
+ const auto primary_subg_idx = ir::SubgraphIndex{0};
+ inferers.at(primary_subg_idx)->infer();
+
+ for (const auto &pair : inferers)
+ {
+ const auto inferer = pair.second.get();
+ inferer->dump();
+ }
+ }
+ else
+ {
+ // Assume multi model has only one subgraph on each model
+ for (const auto &pair : inferers)
+ {
+ const auto inferer = pair.second.get();
+ inferer->infer();
+ inferer->dump();
+ }
}
- inferer.dump();
}
// Shape validation
@@ -452,8 +607,7 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
/*************************************************************
* Backend independent analysis & optimization phase finished
*************************************************************/
-
- executors = std::make_shared<exec::ExecutorMap>();
+ auto executors = std::make_shared<exec::Executors>(std::move(model_edges));
for (auto &pair : lowered_subgs)
{
const auto &subg_index = pair.first;
@@ -464,24 +618,31 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
std::to_string(subg_index.value()));
lowered_subg->graph().operations().iterate(
[&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
- auto executor = std::unique_ptr<exec::IExecutor>{
- ExecutorFactory::get().create(std::move(lowered_subg), _options, executors)};
+
+ auto &options = (model_count > 1) ? *_voptions[subg_index.value()] : *_voptions[0];
+ auto executor = std::unique_ptr<exec::IExecutor>{ExecutorFactory::get().create(
+ std::move(lowered_subg), tracing_ctx.get(), options, executors)};
executor->setIndexedRanks(indexed_ranks);
- executors->insert(std::make_pair(subg_index, std::move(executor)));
+ executors->emplace(subg_index, std::move(executor));
}
/********************************
* Code generation phase finished
********************************/
_state = State::COMPILED;
- return executors;
+ return std::make_shared<CompilerArtifact>(executors, std::move(tracing_ctx));
}
-std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *package_file_path,
- const char *map_file_path)
+std::vector<std::shared_ptr<CompilerArtifact>> Compiler::compile(const char *package_file_path,
+ const char *map_file_path)
{
- std::vector<std::shared_ptr<exec::ExecutorMap>> executors;
- auto executor_map = std::make_shared<exec::ExecutorMap>();
+ // Allow one model compilation for pipeline
+ if (_nnpkg->model_count() != 1)
+ throw std::runtime_error{"Multiple models compilation for pipeline is not supported yet."};
+ assert(_voptions.size() == 1);
+
+ auto model = _nnpkg->primary_model();
+ auto &options = *_voptions[0];
std::string package_path(package_file_path);
std::string partition_map_file;
@@ -508,7 +669,7 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
num_graphs = np.asUInt();
for (uint32_t i = 0; i < (uint32_t)map.size(); ++i)
{
- _options.partial_graph_options.index_to_graph[ir::OperationIndex{i}] =
+ options.partial_graph_options.index_to_graph[ir::OperationIndex{i}] =
ir::SubgraphIndex{map[i].asUInt()};
}
}
@@ -525,25 +686,25 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
// Set control flow backend for control flow operators
{
auto &builtin_id = backend::builtin::Config::ID;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
+ options.manual_scheduler_options.opcode_to_backend[ir::OpCode::If] = builtin_id;
+ options.manual_scheduler_options.opcode_to_backend[ir::OpCode::While] = builtin_id;
+ options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Permute] = builtin_id;
}
// FIXME This is a workaround for bcq operations, should remove it
{
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
- _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
+ options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq";
+ options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
}
- // It doesn't support tracing in case of partial graph
+ // FIXME This is a workaround for bulk operations, should remove it
{
- _options.tracing_ctx = nullptr;
+ options.manual_scheduler_options.opcode_to_backend[ir::OpCode::Bulk] = "trix";
}
- verboseOptions(_options);
+ verboseOptions(options);
- _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+ model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
// Mandatory passes
auto part = subg.partialgraphs();
part->iterate([&](const ir::SubgraphIndex &, ir::Graph &partialgraph) {
@@ -566,38 +727,41 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
// Compilable check
// TODO: Support hybrid execution -
// execution between interpreter and compiled executor (including control flow)
- if (_options.disable_compile)
+ if (options.disable_compile)
{
- _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
- executor_map->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
- executors.push_back(executor_map);
+ std::vector<std::shared_ptr<CompilerArtifact>> results;
+ auto executors = std::make_shared<exec::Executors>();
+
+ model->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) {
+ executors->emplace(index, std::make_unique<interp::InterpExecutor>(subg));
});
+ results.push_back(std::make_shared<CompilerArtifact>(executors, nullptr));
_state = State::COMPILED;
- return executors;
+ return results;
}
// Mode check
- if (_options.he_profiling_mode)
+ if (options.he_profiling_mode)
checkProfilerConditions();
/***************************************************
* Backend independent analysis & optimization phase
***************************************************/
- auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level);
+ auto dump_level = static_cast<dumper::dot::DotDumper::Level>(options.graph_dump_level);
+ onert::dumper::dot::DotDumper dot_dumper_part(dump_level);
// Lower: Assign backend
std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>>
lowered_partialgraphs;
- _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
+ model->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
auto part = subg.partialgraphs();
part->iterate([&](const ir::SubgraphIndex &pindex, ir::Graph &partialgraph) {
- onert::dumper::dot::DotDumper dot_dumper_part(partialgraph, dump_level);
- dot_dumper_part.dump(nnfw::misc::str("before_lower_subg_partialgraph-", pindex.value()));
+ dot_dumper_part.dump(partialgraph,
+ nnfw::misc::str("before_lower_subg_partialgraph-", pindex.value()));
// // Lower: Assign backend
lowered_partialgraphs[pindex] =
- std::make_unique<compiler::LoweredGraph>(subg, partialgraph, _options);
- partialgraph.setSubgraphs(nullptr);
+ std::make_unique<compiler::LoweredGraph>(subg, partialgraph, options);
});
});
@@ -606,25 +770,20 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
const auto &partialgraph_index = pair.first;
auto &lowered_partialgraph = pair.second;
- onert::dumper::dot::DotDumper dot_dumper_lowered_part(lowered_partialgraph.get(), dump_level);
- dot_dumper_lowered_part.dump("after_lower_subg_partialgraph-" +
- std::to_string(partialgraph_index.value()));
+ dot_dumper_part.dump(*lowered_partialgraph, "after_lower_subg_partialgraph-" +
+ std::to_string(partialgraph_index.value()));
}
// Partial Graph shape inference
+ std::unordered_map<ir::SubgraphIndex, std::unique_ptr<StaticShapeInferer>> inferers =
+ createStaticShapeInferers(lowered_partialgraphs);
+ // NOTE If partialgraph has subgraphs StaticShapeInferer may be called multiple times
for (auto &pair : lowered_partialgraphs)
{
const auto &partialgraph_index = pair.first;
- auto &lowered_partialgraph = pair.second;
- StaticShapeInferer partial_inferer(partialgraph_index, lowered_partialgraphs);
- auto ordered_ops = lowered_partialgraph->graph().topolSortOperations();
- for (auto op_ind : ordered_ops)
- {
- const auto &op = lowered_partialgraph->graph().operations().at(op_ind);
- bool has_dynamic_tensor = partial_inferer.infer(op);
- lowered_partialgraph->setHasDynamicTensor(op_ind, has_dynamic_tensor);
- }
- partial_inferer.dump();
+ const auto partial_inferer = inferers.at(partialgraph_index).get();
+ partial_inferer->infer();
+ partial_inferer->dump();
}
// Shape validation
@@ -652,9 +811,11 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
ordered.insert(make_pair(pair.first.value(), std::move(lowered_partialgraph)));
}
+ std::vector<std::shared_ptr<CompilerArtifact>> results;
for (auto &pair : ordered)
{
- executor_map = std::make_shared<exec::ExecutorMap>();
+ auto executors = std::make_shared<exec::Executors>();
+
const auto &partialgraph_index = ir::SubgraphIndex(pair.first);
auto &lowered_partialgraph = pair.second;
auto indexed_ranks = lowered_partialgraph->indexed_ranks();
@@ -663,19 +824,21 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
lowered_partialgraph->graph().operations().iterate(
[&](const ir::OperationIndex &, const ir::Operation &op) { op.accept(dumper); });
auto executor = std::unique_ptr<exec::IExecutor>{
- ExecutorFactory::get().create(std::move(lowered_partialgraph), _options, executor_map)};
+ ExecutorFactory::get().create(std::move(lowered_partialgraph), nullptr, options, executors)};
executor->setIndexedRanks(indexed_ranks);
- executor_map->insert(std::make_pair(ir::SubgraphIndex{0}, std::move(executor)));
- executors.push_back(executor_map);
+ executors->emplace(ir::SubgraphIndex{0}, std::move(executor));
+
+ // It doesn't support tracing in case of partial graph
+ results.push_back(std::make_shared<CompilerArtifact>(executors, nullptr));
}
- _subgraphs.reset();
+ _nnpkg.reset();
/********************************
* Code generation phase finished
********************************/
_state = State::COMPILED;
- return executors;
+ return results;
}
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc
index f9db1ca89..024556e7e 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -16,23 +16,22 @@
#include "ExecutorFactory.h"
-#include "backend/builtin/Config.h"
-#include "backend/builtin/KernelGenerator.h"
-#include "backend/builtin/TensorBuilder.h"
-#include "backend/builtin/UserTensor.h"
-#include "backend/IPortableTensor.h"
-#include "compiler/BackendManager.h"
-#include "compiler/BackendManager.h"
-#include "compiler/ExecutionBuilder.h"
-#include "compiler/Linear.h"
-#include "dumper/text/GraphDumper.h"
-#include "exec/DataflowExecutor.h"
-#include "exec/ExecTime.h"
-#include "exec/ExecutionObservers.h"
-#include "exec/LinearExecutor.h"
-#include "exec/ParallelExecutor.h"
-#include "ir/OperationCloner.h"
-#include "util/TracingCtx.h"
+#include "Linear.h"
+#include "../backend/builtin/BackendContext.h"
+#include "../backend/builtin/Config.h"
+#include "../backend/builtin/UserTensor.h"
+#include "../dumper/text/GraphDumper.h"
+#include "../exec/DataflowExecutor.h"
+#include "../exec/ExecTime.h"
+#include "../exec/ExecutionObservers.h"
+#include "../exec/LinearExecutor.h"
+#include "../exec/ParallelExecutor.h"
+#include "../ir/OperationCloner.h"
+
+#include <backend/IPortableTensor.h>
+#include <compiler/BackendManager.h>
+#include <compiler/ExecutionBuilder.h>
+#include <util/TracingCtx.h>
#include <functional>
#include <memory>
@@ -242,16 +241,17 @@ ExecutorFactory::ExecutorFactory()
{
_map["Linear"] = createLinearExecutor;
_map["Dataflow"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
- std::placeholders::_3, false);
+ std::placeholders::_3, std::placeholders::_4, false);
_map["Parallel"] = std::bind(createDataflowExecutor, std::placeholders::_1, std::placeholders::_2,
- std::placeholders::_3, true);
+ std::placeholders::_3, std::placeholders::_4, true);
}
exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ const util::TracingCtx *tracing_ctx,
const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)
+ const std::shared_ptr<exec::Executors> &executors)
{
- return _map.at(options.executor)(std::move(lowered_graph), options, executor_map);
+ return _map.at(options.executor)(std::move(lowered_graph), tracing_ctx, options, executors);
}
void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
@@ -282,7 +282,7 @@ void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_grap
}
void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
- const std::shared_ptr<exec::ExecutorMap> &executor_map,
+ const std::shared_ptr<exec::Executors> &executors,
const backend::BackendContexts &backend_contexts)
{
for (auto &pair : backend_contexts)
@@ -292,7 +292,7 @@ void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
{
auto builtin_kernel_gen = builtin_context->kernel_gen;
builtin_kernel_gen->setTensorRegistries(tensor_regs);
- builtin_kernel_gen->setExecutorMap(executor_map);
+ builtin_kernel_gen->setExecutors(executors);
}
}
}
@@ -317,12 +317,11 @@ ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_con
return ordered_contexts;
}
-exec::IExecutor *
-ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)
+exec::IExecutor *ExecutorFactory::createLinearExecutor(
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors)
{
- auto graph = lowered_graph->graph();
+ auto &graph = lowered_graph->graph();
backend::BackendContexts backend_contexts =
createBackendContexts(*lowered_graph, options.executor == "Linear");
@@ -346,7 +345,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
prepareMigrantTensors(*lowered_graph, backend_contexts);
// Give some runtime objects to builtin KernelGenerator
- prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts);
+ prepareBuiltinBackend(tensor_regs, executors, backend_contexts);
ExecutionBuilder builder;
@@ -426,14 +425,17 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
auto code_map = builder.releaseCodeMap();
- auto exec = new exec::LinearExecutor{
- std::move(lowered_graph), std::move(backend_contexts), tensor_regs, std::move(code_map), order,
- options.tracing_ctx};
+ auto exec = new exec::LinearExecutor{std::move(lowered_graph),
+ std::move(backend_contexts),
+ tensor_regs,
+ std::move(code_map),
+ order,
+ tracing_ctx};
if (!options.trace_filepath.empty())
{
- std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
- options.trace_filepath, exec->graph(), options.tracing_ctx);
+ std::unique_ptr<exec::IExecutionObserver> ctp =
+ std::make_unique<exec::TracingObserver>(options.trace_filepath, exec->graph(), tracing_ctx);
exec->addObserver(std::move(ctp));
}
@@ -441,8 +443,9 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
}
exec::IExecutor *ExecutorFactory::createDataflowExecutor(
- std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel)
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors,
+ bool parallel)
{
backend::BackendContexts backend_contexts =
createBackendContexts(*lowered_graph, options.executor == "Linear");
@@ -462,7 +465,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
prepareMigrantTensors(*lowered_graph, backend_contexts);
// Give some runtime objects to builtin KernelGenerator
- prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts);
+ prepareBuiltinBackend(tensor_regs, executors, backend_contexts);
ExecutionBuilder builder;
@@ -491,13 +494,13 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
if (parallel)
{
exec = new exec::ParallelExecutor{std::move(lowered_graph), std::move(backend_contexts),
- tensor_regs, std::move(code_map), options.tracing_ctx};
+ tensor_regs, std::move(code_map), tracing_ctx};
}
else
{
auto dataflow_exec =
new exec::DataflowExecutor{std::move(lowered_graph), std::move(backend_contexts), tensor_regs,
- std::move(code_map), options.tracing_ctx};
+ std::move(code_map), tracing_ctx};
if (options.he_profiling_mode)
{
std::vector<const backend::Backend *> backends;
@@ -515,8 +518,8 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
if (!options.trace_filepath.empty())
{
- std::unique_ptr<exec::IExecutionObserver> ctp = std::make_unique<exec::TracingObserver>(
- options.trace_filepath, exec->graph(), options.tracing_ctx);
+ std::unique_ptr<exec::IExecutionObserver> ctp =
+ std::make_unique<exec::TracingObserver>(options.trace_filepath, exec->graph(), tracing_ctx);
exec->addObserver(std::move(ctp));
}
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h
index 2ee05fae3..70c089f8c 100644
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -21,7 +21,7 @@
#include "backend/ITensor.h"
#include "compiler/LoweredGraph.h"
-#include "exec/IExecutor.h"
+#include "exec/Executors.h"
#include <deque>
#include <unordered_map>
@@ -38,8 +38,9 @@ public:
public:
exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ const util::TracingCtx *tracing_ctx,
const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map);
+ const std::shared_ptr<exec::Executors> &executors);
private:
ExecutorFactory();
@@ -48,25 +49,26 @@ private:
static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
const backend::BackendContexts &backend_contexts);
static void prepareBuiltinBackend(const TensorRegistries &tensor_regs,
- const std::shared_ptr<exec::ExecutorMap> &executor_map,
+ const std::shared_ptr<exec::Executors> &executors,
const backend::BackendContexts &backend_contexts);
static std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
orderBackendContext(const backend::BackendContexts &backend_contexts);
- static exec::IExecutor *
- createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map);
+ static exec::IExecutor *createLinearExecutor(
+ std::unique_ptr<compiler::LoweredGraph> lowered_graph, const util::TracingCtx *tracing_ctx,
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors);
static exec::IExecutor *
createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
+ const util::TracingCtx *tracing_ctx,
const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel);
+ const std::shared_ptr<exec::Executors> &executors, bool parallel);
private:
- std::unordered_map<std::string, std::function<exec::IExecutor *(
- std::unique_ptr<compiler::LoweredGraph>,
- const compiler::CompilerOptions &options,
- const std::shared_ptr<exec::ExecutorMap> &executor_map)>>
+ std::unordered_map<
+ std::string,
+ std::function<exec::IExecutor *(
+ std::unique_ptr<compiler::LoweredGraph>, const util::TracingCtx *tracing_ctx,
+ const compiler::CompilerOptions &options, const std::shared_ptr<exec::Executors> &executors)>>
_map;
};
diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
index 5c1cef1ab..98dc906e4 100644
--- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
+++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc
@@ -180,7 +180,7 @@ void Fp32ToFp16Converter::appendOpSequences()
{
_lowered_graph.op_seqs().iterate(
[&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
// For now, the only acl_cl supports fully fp16 type
@@ -375,7 +375,7 @@ void Fp32ToFp16Converter::convertOperands()
{
_lowered_graph.op_seqs().iterate(
[&](const ir::OpSequenceIndex &op_seq_ind, ir::OpSequence &op_seq) {
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
// For now, the only acl_cl supports fully fp16
if (lower_info->backend()->config()->id() != kAclClBackendConfigId)
@@ -515,7 +515,7 @@ ir::OperandIndex Fp32ToFp16Converter::newCopiedOperand(const ir::OperandIndex &o
void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
const ir::OperandIndex &new_op_ind)
{
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
auto new_lower_info = std::make_unique<compiler::OperandLowerInfo>();
auto permute_factor = compiler::PermuteFactor(lower_info->backend(), lower_info->layout());
@@ -527,7 +527,7 @@ void Fp32ToFp16Converter::setNewOperandLowerInfo(const ir::OpSequenceIndex &op_s
void Fp32ToFp16Converter::setNewOperationLowerInfo(const ir::OpSequenceIndex &op_seq_ind,
const ir::OpSequenceIndex &new_op_seq_ind)
{
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
auto new_lower_info =
@@ -635,7 +635,7 @@ ir::OpSequenceIndex Fp32ToFp16Converter::newOpSequence(const ir::OpSequenceIndex
const ir::OperationIndex &node_index)
{
auto &node = _lowered_graph.graph().operations().at(node_index);
- const auto lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
+ const auto &lower_info = _lowered_graph.getLowerInfo(op_seq_ind);
assert(lower_info != nullptr);
auto layout = lower_info->layout();
diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc
index 2f996c8e8..c4bfddb8f 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.cc
+++ b/runtime/onert/core/src/compiler/HEScheduler.cc
@@ -14,17 +14,14 @@
* limitations under the License.
*/
-#include "ir/Operand.h"
-#include "compiler/HEScheduler.h"
-#include "ir/Graph.h"
-#include "util/ConfigSource.h"
+#include "HEScheduler.h"
+
#include "compiler/BackendResolver.h"
+#include "ir/Graph.h"
#include "util/logging.h"
-#include "util/Utils.h"
-#include "exec/FunctionSequence.h"
+
#include <cassert>
#include <cmath>
-#include <chrono>
namespace
{
diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h
index 1a95b9881..18ea388fd 100644
--- a/runtime/onert/core/src/compiler/HEScheduler.h
+++ b/runtime/onert/core/src/compiler/HEScheduler.h
@@ -23,14 +23,16 @@
#ifndef __ONERT_COMPILER_H_E_SCHEDULER_H_
#define __ONERT_COMPILER_H_E_SCHEDULER_H_
-#include "compiler/IScheduler.h"
-#include "compiler/BackendManager.h"
-#include "compiler/Compiler.h"
-#include "ir/Graph.h"
-#include "exec/ExecTime.h"
-#include "backend/Backend.h"
-#include <memory>
-#include "ir/OperationIndexMap.h"
+#include "IScheduler.h"
+#include "../backend/builtin/Config.h"
+#include "../exec/ExecTime.h"
+
+#include <backend/Backend.h>
+#include <compiler/BackendManager.h>
+#include <compiler/Compiler.h>
+#include <ir/Graph.h>
+#include <ir/OperationIndexMap.h>
+
#include <map>
#include <memory>
diff --git a/runtime/onert/core/src/compiler/HEScheduler.test.cc b/runtime/onert/core/src/compiler/HEScheduler.test.cc
new file mode 100644
index 000000000..c4a2df025
--- /dev/null
+++ b/runtime/onert/core/src/compiler/HEScheduler.test.cc
@@ -0,0 +1,572 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "HEScheduler.h"
+#include "../exec/ExecTime.h"
+
+#include <ir/DataType.h>
+#include <ir/InternalType.h>
+#include <ir/Shape.h>
+#include <ir/TypeInfo.h>
+#include <ir/operation/BinaryArithmetic.h>
+#include <ir/operation/FullyConnected.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace onert;
+using namespace ir;
+using namespace backend;
+using namespace operation;
+using namespace exec;
+
+//
+// Mock backends classes
+//
+
+struct MockConfigCPU : public IConfig
+{
+ std::string id() override { return "cpu"; }
+ bool initialize() override { return true; };
+ bool supportPermutation() override { return false; }
+ Layout supportLayout(const Operation &, Layout) override { return Layout::UNKNOWN; }
+ bool supportDynamicTensor() override { return false; }
+ bool supportFP16() override { return false; }
+};
+
+class MockBackendContext : public BackendContext
+{
+public:
+ using BackendContext::BackendContext;
+ ITensorRegistry *genTensors() override { return nullptr; }
+ FunctionMap genKernels() override { return {}; }
+};
+
+struct MockBackendCPU : public Backend
+{
+ std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
+ std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
+ {
+ return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
+ }
+};
+
+struct MockConfigGPU : public IConfig
+{
+ std::string id() override { return "gpu"; }
+ bool initialize() override { return true; };
+ bool supportPermutation() override { return false; }
+ ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
+ {
+ return ir::Layout::UNKNOWN;
+ }
+ bool supportDynamicTensor() override { return false; }
+ bool supportFP16() override { return false; }
+};
+
+struct MockBackendGPU : public Backend
+{
+ std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
+ std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
+ {
+ return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
+ }
+};
+
+struct MockConfigNPU : public IConfig
+{
+ std::string id() override { return "npu"; }
+ bool initialize() override { return true; };
+ bool supportPermutation() override { return false; }
+ ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
+ {
+ return ir::Layout::UNKNOWN;
+ }
+ bool supportDynamicTensor() override { return false; }
+ bool supportFP16() override { return false; }
+};
+
+struct MockBackendNPU : public Backend
+{
+ std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
+ std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
+ {
+ return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
+ }
+};
+
+//
+// Constants
+//
+
+const int OPERAND_ELEMS = 268203;
+const int OPERAND_SIZE = OPERAND_ELEMS * 4;
+const int OPERATION_SIZE = OPERAND_SIZE * 3;
+
+const std::string LINEAR("Linear");
+const std::string DATAFLOW("Dataflow");
+const std::string PARALLEL("Parallel");
+
+//
+// Helper functions
+//
+
+// Set executor through environment variable
+void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); }
+
+// Set profiling mode through environment variable
+void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); }
+
+// Calculate operation size by addition sizes of all input and output operands
+uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx)
+{
+ uint32_t size = 0;
+ const auto &op = graph->operations().at(op_idx);
+ for (const auto &ind : op.getInputs() + op.getOutputs())
+ size += graph->operands().at(ind).info().total_size();
+ return size;
+}
+
+// Set execution operation time. This method is needed since ExecutionTime has only
+// 'updateOperationExecTime' method.
+void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation,
+ bool quant, uint32_t op_size, int64_t time)
+{
+ // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
+ assert(time > 0);
+ int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size);
+ int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
+ et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set);
+ assert(et.getOperationExecTime(backend, operation, quant, op_size) == time);
+}
+
+// Set same execution time for all given backends/operations
+void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
+ const std::vector<std::string> &op_names,
+ const std::vector<uint32_t> &op_sizes, int64_t exec_time)
+{
+ assert(op_names.size() == op_sizes.size());
+ ExecTime et(backends);
+ for (int i = 0; i < op_names.size(); ++i)
+ {
+ for (auto &backend : backends)
+ setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
+ }
+ et.storeOperationsExecTime();
+}
+
+// Set permute time from one backend to another. This method is needed since ExecutionTime has only
+// 'updatePermuteTime' method.
+void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend,
+ bool quant, uint32_t op_size, int64_t time)
+{
+ // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
+ assert(time > 0);
+ int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size);
+ int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
+ et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set);
+ assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time);
+}
+
+// Set same permutation time between all given backends
+void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
+ const int operand_size, const int64_t exec_time)
+{
+ ExecTime et(backends);
+ for (const auto &backend : backends)
+ {
+ for (auto &other_backend : backends)
+ {
+ if (backend == other_backend)
+ continue;
+ setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
+ }
+ }
+ et.storeOperationsExecTime();
+}
+
+//
+// Functions for creating graphs
+//
+
+using OIS = OperandIndexSequence;
+
+template <typename NodeT, typename... Types>
+OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
+{
+ auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
+ auto op_idx = graph->addOperation(std::move(op));
+ // For now in scheduler test all operations in tested graphs has same size (for simplicity)
+ assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
+ return op_idx;
+}
+
+// Create straight graph: Add->Sub->Mul
+std::shared_ptr<Graph> createStraightGraph()
+{
+ auto graph = std::make_shared<Graph>();
+ const TypeInfo float_op(DataType::FLOAT32);
+
+ // Create add node
+ auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
+
+ // Create sub node
+ auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
+
+ // Create mul node
+ auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
+
+ graph->verify();
+ return graph;
+}
+
+/* Create branched graph:
+ * [Add]
+ * // \\
+ * [Mul1] [FC2]
+ * || ||
+ * [Mul2] [FC2]
+ * \\ //
+ * [Sub]
+ */
+std::shared_ptr<Graph> createBranchedGraph()
+{
+ auto graph = std::make_shared<Graph>();
+ const TypeInfo float_op(DataType::FLOAT32);
+
+ // Create add node
+ auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
+
+ // Create mul1 node
+ auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
+ mul1_op_params);
+
+ // Create mul2 node
+ auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
+ mul2_op_params);
+
+ // Create fc1 node
+ auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ FullyConnected::Param fc1_op_params{Activation::NONE};
+ create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
+
+ // Create fc2 node
+ auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ FullyConnected::Param fc2_op_params{Activation::NONE};
+ create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
+
+ // Create sub node
+ auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
+ BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
+ create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
+
+ graph->verify();
+ return graph;
+}
+
+//
+// Tests setup/teardown
+//
+
+// SetUp/TearDown methods runs before/after each test and performs actions common for each test
+class HESchedulerTest : public ::testing::Test
+{
+protected:
+ void SetUp() override
+ {
+ // Initialize mock backends
+ _cpu_backend = new MockBackendCPU();
+ _gpu_backend = new MockBackendGPU();
+ _npu_backend = new MockBackendNPU();
+ _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend};
+
+ // Remove previous profile data if it exists
+ if (!remove("exec_time.json"))
+ {
+ // DO NOTHING (no profile data)
+ }
+
+ // Remember original value of 'EXECUTOR' environment variable
+ char *executor = std::getenv("EXECUTOR");
+ _original_executor = executor == nullptr ? "" : executor;
+
+ // Remember original value of 'PROFILING_MODE' environment variable
+ char *profiling_mode = std::getenv("PROFILING_MODE");
+ _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode;
+ }
+
+ void TearDown() override
+ {
+ delete _cpu_backend;
+ delete _gpu_backend;
+ delete _npu_backend;
+ EXPECT_EQ(remove("exec_time.json"), 0);
+ setenv("EXECUTOR", _original_executor.c_str(), true);
+ setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
+ }
+
+ const MockBackendCPU *_cpu_backend{nullptr};
+ const MockBackendGPU *_gpu_backend{nullptr};
+ const MockBackendNPU *_npu_backend{nullptr};
+ std::vector<const Backend *> _mock_backends;
+
+ std::string _original_executor;
+ std::string _original_profiling_mode;
+};
+
+//
+// HEScheduler tests
+//
+
+class HESchedulerTestWithExecutorParam : public HESchedulerTest,
+ public testing::WithParamInterface<std::string>
+{
+};
+
+// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
+// one time for each executor
+INSTANTIATE_TEST_SUITE_P(AllExecutors, HESchedulerTestWithExecutorParam,
+ testing::Values(LINEAR, DATAFLOW, PARALLEL));
+
+// Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
+TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
+{
+ setExecutor(GetParam());
+
+ // Prepare graph
+ ir::Model model;
+ auto graph(createStraightGraph());
+ model.push(ir::SubgraphIndex{0}, graph);
+ OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
+
+ // Set default execution and transfer time
+ setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1);
+ setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"},
+ {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
+
+ // Test 1
+ // Expected behaviour: scheduler assigns different backend to each node
+ {
+ // For each backend reduce execution time of one node
+ ExecTime et(_mock_backends);
+ setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
+ setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
+ setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
+ et.storeOperationsExecTime();
+
+ // Test scheduler
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
+ ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu");
+ }
+
+ // Test 2
+ // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time
+ {
+ // Increase transfer time
+ setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
+
+ // Test scheduler
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
+ ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu");
+ }
+}
+
+// Test scheduler behavior for branched graph with known execution time of all nodes and permutes
+TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
+{
+ const int64_t NPU_ET = 5000;
+ setExecutor(GetParam());
+
+ // Prepare graph
+ ir::Model model;
+ auto graph(createBranchedGraph());
+ model.push(ir::SubgraphIndex{0}, graph);
+ OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
+ sub_op_idx(5);
+
+ // Set default execution and transfer time
+ setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
+ setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"},
+ {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
+
+ // Test 1
+ // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all
+ // nodes, in case of parallel executor scheduler assigns different backends to branches.
+ {
+ // Reduce execution time
+ ExecTime et(_mock_backends);
+ setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET);
+ setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET);
+ setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET);
+ setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
+ setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
+ setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
+ et.storeOperationsExecTime();
+
+ // Test scheduler
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+ const auto br = scheduler.schedule(*graph);
+
+ std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
+ if (GetParam() == PARALLEL)
+ {
+ branch1_expected_backend =
+ br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
+ branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
+ }
+
+ ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend);
+ ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend);
+ ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend);
+ ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend);
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
+ }
+
+ // Test 2
+ // Expected behaviour: scheduler assigns single backend to all nodes
+ {
+ // Increase execution time for GPU backend
+ ExecTime et(_mock_backends);
+ /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
+ * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
+ * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
+ * branching or scheduler assigns another backend to a node*/
+ setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
+ setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
+ et.storeOperationsExecTime();
+
+ // Test scheduler
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
+ }
+}
+
+// Test scheduler behavior for branched graph and enabled profiling mode
+TEST_F(HESchedulerTest, branched_graph_profiling_mode)
+{
+ const int ET = 1e5;
+
+ // Turn on profiling mode
+ setProfilingMode(true);
+ setExecutor(DATAFLOW);
+
+ // Prepare graph
+ ir::Model model;
+ auto graph(createBranchedGraph());
+ model.push(ir::SubgraphIndex{0}, graph);
+ OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
+ sub_op_idx(5);
+
+ // Test 1
+ // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
+ {
+ // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC
+ ExecTime et(_mock_backends);
+ setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+ setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+ setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
+ et.storeOperationsExecTime();
+
+ // Test scheduler
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
+ ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu");
+ ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu");
+ ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
+ }
+
+ // Test 2
+ // Expected behaviour: scheduler shuffling backends, so different backends are assigned to
+ // neighbor nodes
+ {
+ // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC)
+ ExecTime et(_mock_backends);
+ setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
+ setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
+ setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
+ et.storeOperationsExecTime();
+
+ // Test scheduler
+ auto coptions = *onert::compiler::CompilerOptions::fromGlobalConfig();
+ auto scheduler = compiler::HEScheduler(_mock_backends, coptions);
+ const auto br = scheduler.schedule(*graph);
+ ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
+ br->getBackend(mul1_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
+ br->getBackend(fc1_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(),
+ br->getBackend(mul2_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(),
+ br->getBackend(fc2_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(),
+ br->getBackend(sub_op_idx)->config()->id());
+ ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(),
+ br->getBackend(sub_op_idx)->config()->id());
+ }
+}
+
+// TODO: Add tests with unknown execution and permutation time
+
+} // unnamed namespace
diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc
index 73ba96238..f85b8d1bd 100644
--- a/runtime/onert/core/src/compiler/Linear.cc
+++ b/runtime/onert/core/src/compiler/Linear.cc
@@ -14,15 +14,13 @@
* limitations under the License.
*/
-#include <algorithm>
-#include <sstream>
-
#include "Linear.h"
-#include "backend/IConfig.h"
-#include "backend/Backend.h"
+#include "../dumper/text/GraphDumper.h"
+
#include "util/logging.h"
-#include "dumper/text/GraphDumper.h"
+
+#include <sstream>
namespace onert
{
diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc
index 999bffa7c..9e84753a7 100644
--- a/runtime/onert/core/src/compiler/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -16,24 +16,23 @@
#include "compiler/LoweredGraph.h"
-#include <assert.h>
-#include <algorithm>
-#include <sstream>
-#include "util/logging.h"
-#include "compiler/pass/ConstantInsertionPass.h"
-#include "compiler/pass/ConstantLoweringPass.h"
-#include "compiler/pass/PassRunner.h"
-#include "compiler/pass/PermutationOperationPass.h"
-#include "compiler/pass/PermutationInsertionPass.h"
-#include "compiler/pass/PermutationEliminationPass.h"
-#include "dumper/text/GraphDumper.h"
-#include "ir/verifier/Verifier.h"
+#include "HEScheduler.h"
+#include "ManualScheduler.h"
+#include "pass/ConstantInsertionPass.h"
+#include "pass/ConstantLoweringPass.h"
+#include "pass/PassRunner.h"
+#include "pass/PermutationEliminationPass.h"
+#include "pass/PermutationInsertionPass.h"
+#include "pass/PermutationOperationPass.h"
+#include "../dumper/text/GraphDumper.h"
+#include "../ir/verifier/Verifier.h"
+
#include "backend/Backend.h"
-#include "backend/IConfig.h"
#include "compiler/BackendResolver.h"
-#include "compiler/ManualScheduler.h"
-#include "compiler/HEScheduler.h"
-#include "util/TracingCtx.h"
+#include "util/logging.h"
+
+#include <cassert>
+#include <sstream>
namespace onert
{
@@ -42,7 +41,7 @@ namespace compiler
LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
{
- lowerGraph(graph, options);
+ lowerGraph(options);
}
// TODO Design better class and constructor to represent parent_graph
@@ -50,18 +49,11 @@ LoweredGraph::LoweredGraph(const ir::Graph &parent_graph, const ir::Graph &graph
const CompilerOptions &options)
: _graph{graph}, _parent_graph{parent_graph}
{
- lowerGraph(graph, options);
+ lowerGraph(options);
}
-void LoweredGraph::lowerGraph(const ir::Graph &graph, const CompilerOptions &options)
+void LoweredGraph::lowerGraph(const CompilerOptions &options)
{
- // set tracing_ctx for copied graph
- if (options.tracing_ctx)
- {
- auto subgraph_index = options.tracing_ctx->getSubgraphIndex(&graph);
- options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value());
- }
-
// Build backend contexts
auto &backend_manager = BackendManager::get();
// Create contexts for other backends
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.cc b/runtime/onert/core/src/compiler/ShapeValidator.cc
index 1c7000986..8c6421744 100644
--- a/runtime/onert/core/src/compiler/ShapeValidator.cc
+++ b/runtime/onert/core/src/compiler/ShapeValidator.cc
@@ -34,77 +34,72 @@ namespace onert
namespace compiler
{
-ShapeValidator::ShapeValidator(const ir::Graph &graph)
- : _graph{graph}, _ctx{graph.operands()}, _current_layout{ir::Layout::UNKNOWN}
-{
-}
+ShapeValidator::ShapeValidator(const ir::Graph &graph) : _graph{graph} {}
void ShapeValidator::checkUnaryOp(const ir::Operation &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
// Check if I/O shapes match
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+ OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
}
void ShapeValidator::operator()()
{
- // There is no reason for each subgraph to have subgraphs since compiler has subgraphs when
- // creating Compiler
- assert(_graph.subgraphs() == nullptr);
-
- _current_layout = _graph.layout();
-
_graph.operations().iterate(
[&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); });
}
void ShapeValidator::visit(const ir::operation::BatchMatMul &node)
{
+ const auto &operands = _graph.operands();
const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS));
const auto rhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::RHS));
const auto out_index{node.getOutputs().at(0)};
- if (_ctx.at(out_index).info().isDynamic())
+ if (operands.at(out_index).info().isDynamic())
return;
- OP_REQUIRES(_ctx.at(lhs_index).shape().rank() <= 4);
- OP_REQUIRES(_ctx.at(rhs_index).shape().rank() <= 4);
- OP_REQUIRES(_ctx.at(lhs_index).shape().rank() >= 2);
- OP_REQUIRES(_ctx.at(rhs_index).shape().rank() >= 2);
+ OP_REQUIRES(operands.at(lhs_index).shape().rank() <= 4);
+ OP_REQUIRES(operands.at(rhs_index).shape().rank() <= 4);
+ OP_REQUIRES(operands.at(lhs_index).shape().rank() >= 2);
+ OP_REQUIRES(operands.at(rhs_index).shape().rank() >= 2);
}
void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- const auto frontend_layout = _current_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+ const auto frontend_layout = _graph.layout();
+ const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
// All requirement as per NNAPI specification.
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
+ OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2);
if (node.getInputs().size() != 2)
{
const auto crops_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::CROPS_DATA)};
- OP_REQUIRES(_ctx.at(crops_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(crops_index).shape().dim(0) == (_ctx.at(ifm_index).shape().rank() - 2));
- OP_REQUIRES(_ctx.at(crops_index).shape().dim(1) == 2);
+ OP_REQUIRES(operands.at(crops_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(crops_index).shape().dim(0) ==
+ (operands.at(ifm_index).shape().rank() - 2));
+ OP_REQUIRES(operands.at(crops_index).shape().dim(1) == 2);
}
OP_REQUIRES(input_shape.C == output_shape.C);
@@ -112,8 +107,9 @@ void ShapeValidator::visit(const ir::operation::BatchToSpaceND &node)
void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
@@ -125,16 +121,16 @@ void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node)
node.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
// const auto bias_index{node.getInputs().at(ir::operation::BCQFullyConnected::Input::BIAS)};
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(weight_scales_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(weight_binary_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(weight_cluster_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(weight_scales_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(weight_binary_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(weight_cluster_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(ifm_index).shape().dim(1) == _ctx.at(ofm_index).shape().dim(1));
+ OP_REQUIRES(operands.at(ifm_index).shape().dim(1) == operands.at(ofm_index).shape().dim(1));
- OP_REQUIRES(_ctx.at(weight_cluster_index).shape().dim(0) > 0);
- OP_REQUIRES(_ctx.at(weight_cluster_index).shape().dim(1) == 2);
+ OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(0) > 0);
+ OP_REQUIRES(operands.at(weight_cluster_index).shape().dim(1) == 2);
// more shape validation will be done inside kernel.
@@ -143,8 +139,9 @@ void ShapeValidator::visit(const ir::operation::BCQFullyConnected &node)
void ShapeValidator::visit(const ir::operation::BCQGather &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto indices_index{node.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
@@ -153,13 +150,14 @@ void ShapeValidator::visit(const ir::operation::BCQGather &node)
const auto input_clusters_index{
node.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
- OP_REQUIRES(_ctx.at(indices_index).shape().rank() <= 2); // TODO : support rank up to 4 or more
- OP_REQUIRES(_ctx.at(input_binary_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(input_scales_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(input_clusters_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(indices_index).shape().rank() <=
+ 2); // TODO : support rank up to 4 or more
+ OP_REQUIRES(operands.at(input_binary_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(input_scales_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(input_clusters_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(input_clusters_index).shape().dim(0) > 0);
- OP_REQUIRES(_ctx.at(input_clusters_index).shape().dim(1) == 2);
+ OP_REQUIRES(operands.at(input_clusters_index).shape().dim(0) > 0);
+ OP_REQUIRES(operands.at(input_clusters_index).shape().dim(1) == 2);
// more shape validation will be done inside kernel.
}
@@ -171,62 +169,67 @@ void ShapeValidator::visit(const ir::operation::Comparison &)
void ShapeValidator::visit(const ir::operation::Softmax &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(0)};
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+ OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
}
void ShapeValidator::visit(const ir::operation::InstanceNorm &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ifm_index).shape() == _ctx.at(ofm_index).shape());
- OP_REQUIRES(_ctx.at(gamma_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ifm_index).shape() == operands.at(ofm_index).shape());
+ OP_REQUIRES(operands.at(gamma_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(beta_index).shape().rank() == 1);
}
void ShapeValidator::visit(const ir::operation::Pool2D &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
}
void ShapeValidator::visit(const ir::operation::Permute &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(0)};
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+ OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
}
void ShapeValidator::visit(const ir::operation::Reduce &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
- const auto input_shape = _ctx.at(input_index).shape();
- const auto output_shape = _ctx.at(output_index).shape();
+ const auto input_shape = operands.at(input_index).shape();
+ const auto output_shape = operands.at(output_index).shape();
OP_REQUIRES(input_shape.rank() <= 4);
OP_REQUIRES(output_shape.rank() <= input_shape.rank());
@@ -266,18 +269,20 @@ void ShapeValidator::visit(const ir::operation::Reduce &node)
void ShapeValidator::visit(const ir::operation::Transpose &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
const auto perm_index{node.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
- const auto &output_shape = _ctx.at(output_index).shape();
- const auto &input_shape = _ctx.at(input_index).shape();
+ const auto &output_shape = operands.at(output_index).shape();
+ const auto &input_shape = operands.at(input_index).shape();
- OP_REQUIRES(_ctx.at(perm_index).shape().num_elements() == 0 ||
- input_shape.rank() == static_cast<int>(_ctx.at(perm_index).shape().num_elements()));
+ OP_REQUIRES(operands.at(perm_index).shape().num_elements() == 0 ||
+ input_shape.rank() ==
+ static_cast<int>(operands.at(perm_index).shape().num_elements()));
OP_REQUIRES(input_shape.rank() == output_shape.rank());
}
@@ -285,8 +290,9 @@ void ShapeValidator::visit(const ir::operation::RNN &node)
{
// NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
// TODO Support dynamic rnn
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto hidden_state_out_index{
@@ -299,35 +305,36 @@ void ShapeValidator::visit(const ir::operation::RNN &node)
const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
- const auto batch_size = _ctx.at(output_index).shape().dim(0);
- const auto num_units = _ctx.at(output_index).shape().dim(1);
-
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 2 &&
- _ctx.at(hidden_state_out_index).shape().rank() == 2 &&
- _ctx.at(input_index).shape().rank() == 2 &&
- _ctx.at(weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_weights_index).shape().rank() == 2 &&
- _ctx.at(hidden_state_in_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(bias_index).shape().rank() == 1);
-
- OP_REQUIRES(batch_size == _ctx.at(input_index).shape().dim(0) &&
- batch_size == _ctx.at(hidden_state_in_index).shape().dim(0) &&
- batch_size == _ctx.at(hidden_state_out_index).shape().dim(0));
- OP_REQUIRES(_ctx.at(input_index).shape().dim(1) == _ctx.at(weights_index).shape().dim(1));
-
- OP_REQUIRES(num_units == _ctx.at(weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_weights_index).shape().dim(0) &&
- num_units == _ctx.at(bias_index).shape().dim(0));
- OP_REQUIRES(num_units == _ctx.at(output_index).shape().dim(1) &&
- num_units == _ctx.at(recurrent_weights_index).shape().dim(1) &&
- num_units == _ctx.at(hidden_state_in_index).shape().dim(1) &&
- num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
+ const auto batch_size = operands.at(output_index).shape().dim(0);
+ const auto num_units = operands.at(output_index).shape().dim(1);
+
+ OP_REQUIRES(operands.at(output_index).shape().rank() == 2 &&
+ operands.at(hidden_state_out_index).shape().rank() == 2 &&
+ operands.at(input_index).shape().rank() == 2 &&
+ operands.at(weights_index).shape().rank() == 2 &&
+ operands.at(recurrent_weights_index).shape().rank() == 2 &&
+ operands.at(hidden_state_in_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(bias_index).shape().rank() == 1);
+
+ OP_REQUIRES(batch_size == operands.at(input_index).shape().dim(0) &&
+ batch_size == operands.at(hidden_state_in_index).shape().dim(0) &&
+ batch_size == operands.at(hidden_state_out_index).shape().dim(0));
+ OP_REQUIRES(operands.at(input_index).shape().dim(1) == operands.at(weights_index).shape().dim(1));
+
+ OP_REQUIRES(num_units == operands.at(weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_weights_index).shape().dim(0) &&
+ num_units == operands.at(bias_index).shape().dim(0));
+ OP_REQUIRES(num_units == operands.at(output_index).shape().dim(1) &&
+ num_units == operands.at(recurrent_weights_index).shape().dim(1) &&
+ num_units == operands.at(hidden_state_in_index).shape().dim(1) &&
+ num_units == operands.at(hidden_state_out_index).shape().dim(1));
}
void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
@@ -335,39 +342,40 @@ void ShapeValidator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- const auto frontend_layout = _current_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+ const auto frontend_layout = _graph.layout();
+ const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
// All requirement as per NNAPI specification.
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(block_size_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(paddings_index).shape().rank() == 2);
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(block_size_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(paddings_index).shape().rank() == 2);
- OP_REQUIRES(_ctx.at(block_size_index).shape().dim(0) == 2);
- OP_REQUIRES(_ctx.at(paddings_index).shape().dim(0) == 2);
- OP_REQUIRES(_ctx.at(paddings_index).shape().dim(1) == 2);
+ OP_REQUIRES(operands.at(block_size_index).shape().dim(0) == 2);
+ OP_REQUIRES(operands.at(paddings_index).shape().dim(0) == 2);
+ OP_REQUIRES(operands.at(paddings_index).shape().dim(1) == 2);
OP_REQUIRES(input_shape.C == output_shape.C);
}
void ShapeValidator::visit(const ir::operation::SpaceToDepth &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
- const auto frontend_layout = _current_layout;
- const auto input_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
- const auto output_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
+ const auto frontend_layout = _graph.layout();
+ const auto input_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto output_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
const auto block_size = node.param().block_size;
// All assertions as per NNAPI specification.
- OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ifm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
OP_REQUIRES((input_shape.H % block_size == 0) && (input_shape.W % block_size == 0));
OP_REQUIRES(input_shape.N == output_shape.N);
OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C);
@@ -382,29 +390,31 @@ void ShapeValidator::visit(const ir::operation::ElementwiseBinary &)
void ShapeValidator::visit(const ir::operation::ElementwiseUnary &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+ OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
}
void ShapeValidator::visit(const ir::operation::EmbeddingLookup &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- const auto &output_obj = _ctx.at(output_index);
- const auto &lookups_obj = _ctx.at(lookups_index);
- const auto &values_obj = _ctx.at(values_index);
+ const auto &output_obj = operands.at(output_index);
+ const auto &lookups_obj = operands.at(lookups_index);
+ const auto &values_obj = operands.at(values_index);
// Verify operand here, not at SimpleEmbeddingLookup::configure() to avoid acl's modifying
// TensorShape sometimes(Issue: https://github.sec.samsung.net/STAR/nnfw/issues/729)
{
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto &output_shape = output_obj.shape();
@@ -427,26 +437,28 @@ void ShapeValidator::visit(const ir::operation::EmbeddingLookup &node)
void ShapeValidator::visit(const ir::operation::ExpandDims &node)
{
+ const auto &operands = _graph.operands();
const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
- if (_ctx.at(axis_index).info().isDynamic())
+ if (operands.at(axis_index).info().isDynamic())
return;
- OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1);
+ OP_REQUIRES(operands.at(axis_index).shape().rank() <= 1);
}
void ShapeValidator::visit(const ir::operation::HashtableLookup &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- const auto &output_obj = _ctx.at(output_index);
- const auto &lookups_obj = _ctx.at(lookups_index);
- const auto &keys_obj = _ctx.at(keys_index);
- const auto &values_obj = _ctx.at(values_index);
+ const auto &output_obj = operands.at(output_index);
+ const auto &lookups_obj = operands.at(lookups_index);
+ const auto &keys_obj = operands.at(keys_index);
+ const auto &values_obj = operands.at(values_index);
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto &output_shape = output_obj.shape();
@@ -464,28 +476,30 @@ void ShapeValidator::visit(const ir::operation::HashtableLookup &node)
void ShapeValidator::visit(const ir::operation::TransposeConv &node)
{
// shape check
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
// Only 4D tensors are supported
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
- OP_REQUIRES(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ifm_index).shape().rank());
+ OP_REQUIRES(operands.at(ofm_index).shape().rank() == operands.at(ker_index).shape().rank());
- const auto frontend_layout = _current_layout;
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(frontend_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(frontend_layout);
+ const auto frontend_layout = _graph.layout();
+ const auto ofm_shape = operands.at(ofm_index).shape().asFeature(frontend_layout);
+ const auto ifm_shape = operands.at(ifm_index).shape().asFeature(frontend_layout);
// The kernel has only IHWO layout on frontend
// So ker_shape is treated here below
// I -> N
// H -> H
// W -> W
// O -> C
- const auto ker_shape = _ctx.at(ker_index).shape().asFeature(ir::Layout::NHWC);
+ const auto ker_shape = operands.at(ker_index).shape().asFeature(ir::Layout::NHWC);
OP_REQUIRES(ifm_shape.N == ofm_shape.N);
OP_REQUIRES(ifm_shape.C == ker_shape.C);
@@ -494,16 +508,17 @@ void ShapeValidator::visit(const ir::operation::TransposeConv &node)
void ShapeValidator::visit(const ir::operation::Gather &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
- const auto ifm_shape = _ctx.at(ifm_index).shape();
- const auto indices_shape = _ctx.at(indices_index).shape();
- const auto ofm_shape = _ctx.at(ofm_index).shape();
+ const auto ifm_shape = operands.at(ifm_index).shape();
+ const auto indices_shape = operands.at(indices_index).shape();
+ const auto ofm_shape = operands.at(ofm_index).shape();
OP_REQUIRES(ifm_shape.rank() <= 4);
OP_REQUIRES(indices_shape.rank() <= 3);
@@ -512,21 +527,22 @@ void ShapeValidator::visit(const ir::operation::Gather &node)
void ShapeValidator::visit(const ir::operation::DepthToSpace &node)
{
+ const auto &operands = _graph.operands();
int32_t block_size = node.param().block_size;
// shape check
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
- const auto frontend_layout = _current_layout;
- const auto output_shape = _ctx.at(output_index).shape().asFeature(frontend_layout);
- const auto input_shape = _ctx.at(input_index).shape().asFeature(frontend_layout);
+ const auto frontend_layout = _graph.layout();
+ const auto output_shape = operands.at(output_index).shape().asFeature(frontend_layout);
+ const auto input_shape = operands.at(input_index).shape().asFeature(frontend_layout);
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(input_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(output_index).shape().rank() == 4);
{
OP_REQUIRES(output_shape.N == input_shape.N);
@@ -539,22 +555,23 @@ void ShapeValidator::visit(const ir::operation::DepthToSpace &node)
void ShapeValidator::visit(const ir::operation::Pack &node)
{
+ const auto &operands = _graph.operands();
const auto axis{node.param().axis};
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
// shape check
- const auto &output_shape = _ctx.at(output_index).shape();
+ const auto &output_shape = operands.at(output_index).shape();
const auto output_rank = static_cast<int32_t>(output_shape.rank());
const auto input1_index{node.getInputs().at(0)};
- const auto input_shape = _ctx.at(input1_index).shape();
+ const auto input_shape = operands.at(input1_index).shape();
OP_REQUIRES(axis >= -output_rank && axis < output_rank);
for (const auto &index : node.getInputs())
{
- OP_REQUIRES(input_shape == _ctx.at(index).shape());
+ OP_REQUIRES(input_shape == operands.at(index).shape());
}
}
@@ -562,8 +579,9 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
{
// NOTE This validation is for static rnn(non-dynamic shape), but not for dynamic rnn
// TODO Support dynamic rnn
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto scratch_buffer_index{
@@ -611,91 +629,96 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
- for (int i = 0; i < _ctx.at(input_index).shape().rank() - 1; ++i)
+ OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
+ for (int i = 0; i < operands.at(input_index).shape().rank() - 1; ++i)
{
- OP_REQUIRES(_ctx.at(input_index).shape().dim(i) == _ctx.at(output_index).shape().dim(i));
+ OP_REQUIRES(operands.at(input_index).shape().dim(i) ==
+ operands.at(output_index).shape().dim(i));
}
- OP_REQUIRES(
- (_ctx.at(output_index).shape().rank() == 2 || _ctx.at(output_index).shape().rank() == 3) &&
- (_ctx.at(input_index).shape().rank() == 2 || _ctx.at(input_index).shape().rank() == 3) &&
- (!_ctx.exist(input_to_input_weights_index) ||
- _ctx.at(input_to_input_weights_index).shape().rank() == 2) &&
- _ctx.at(input_to_forget_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_cell_weights_index).shape().rank() == 2 &&
- _ctx.at(input_to_output_weights_index).shape().rank() == 2 &&
- (!_ctx.exist(recurrent_to_input_weights_index) ||
- _ctx.at(recurrent_to_input_weights_index).shape().rank() == 2) &&
- _ctx.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
- _ctx.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
- (!_ctx.exist(projection_weights_index) ||
- _ctx.at(projection_weights_index).shape().rank() == 2) &&
- _ctx.at(output_state_in_index).shape().rank() == 2 &&
- _ctx.at(cell_state_in_index).shape().rank() == 2);
-
- OP_REQUIRES(
- (!_ctx.exist(cell_to_input_weights_index) ||
- _ctx.at(cell_to_input_weights_index).shape().rank() == 1) &&
- (!_ctx.exist(cell_to_forget_weights_index) ||
- _ctx.at(cell_to_forget_weights_index).shape().rank() == 1) &&
- (!_ctx.exist(cell_to_output_weights_index) ||
- _ctx.at(cell_to_output_weights_index).shape().rank() == 1) &&
- (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().rank() == 1) &&
- _ctx.at(forget_gate_bias_index).shape().rank() == 1 &&
- _ctx.at(cell_bias_index).shape().rank() == 1 &&
- _ctx.at(output_gate_bias_index).shape().rank() == 1 &&
- (!_ctx.exist(projection_bias_index) || _ctx.at(projection_bias_index).shape().rank() == 1));
+ OP_REQUIRES((operands.at(output_index).shape().rank() == 2 ||
+ operands.at(output_index).shape().rank() == 3) &&
+ (operands.at(input_index).shape().rank() == 2 ||
+ operands.at(input_index).shape().rank() == 3) &&
+ (!operands.exist(input_to_input_weights_index) ||
+ operands.at(input_to_input_weights_index).shape().rank() == 2) &&
+ operands.at(input_to_forget_weights_index).shape().rank() == 2 &&
+ operands.at(input_to_cell_weights_index).shape().rank() == 2 &&
+ operands.at(input_to_output_weights_index).shape().rank() == 2 &&
+ (!operands.exist(recurrent_to_input_weights_index) ||
+ operands.at(recurrent_to_input_weights_index).shape().rank() == 2) &&
+ operands.at(recurrent_to_forget_weights_index).shape().rank() == 2 &&
+ operands.at(recurrent_to_cell_weights_index).shape().rank() == 2 &&
+ operands.at(recurrent_to_output_weights_index).shape().rank() == 2 &&
+ (!operands.exist(projection_weights_index) ||
+ operands.at(projection_weights_index).shape().rank() == 2) &&
+ operands.at(output_state_in_index).shape().rank() == 2 &&
+ operands.at(cell_state_in_index).shape().rank() == 2);
+
+ OP_REQUIRES((!operands.exist(cell_to_input_weights_index) ||
+ operands.at(cell_to_input_weights_index).shape().rank() == 1) &&
+ (!operands.exist(cell_to_forget_weights_index) ||
+ operands.at(cell_to_forget_weights_index).shape().rank() == 1) &&
+ (!operands.exist(cell_to_output_weights_index) ||
+ operands.at(cell_to_output_weights_index).shape().rank() == 1) &&
+ (!operands.exist(input_gate_bias_index) ||
+ operands.at(input_gate_bias_index).shape().rank() == 1) &&
+ operands.at(forget_gate_bias_index).shape().rank() == 1 &&
+ operands.at(cell_bias_index).shape().rank() == 1 &&
+ operands.at(output_gate_bias_index).shape().rank() == 1 &&
+ (!operands.exist(projection_bias_index) ||
+ operands.at(projection_bias_index).shape().rank() == 1));
// CIFG assertion
- OP_REQUIRES(
- ((!_ctx.exist(input_to_input_weights_index) ||
- (_ctx.at(input_to_input_weights_index).shape().dim(0) == 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) == 0)) &&
- (!_ctx.exist(recurrent_to_input_weights_index) ||
- (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) &&
- (!_ctx.exist(input_gate_bias_index) || _ctx.at(input_gate_bias_index).shape().dim(0) == 0) &&
- (!_ctx.exist(cell_to_input_weights_index) ||
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0)) ||
- ((_ctx.exist(input_to_input_weights_index) &&
- (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0)) &&
- (_ctx.exist(recurrent_to_input_weights_index) &&
- (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) &&
- (_ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0)));
+ OP_REQUIRES(((!operands.exist(input_to_input_weights_index) ||
+ (operands.at(input_to_input_weights_index).shape().dim(0) == 0 &&
+ operands.at(input_to_input_weights_index).shape().dim(1) == 0)) &&
+ (!operands.exist(recurrent_to_input_weights_index) ||
+ (operands.at(recurrent_to_input_weights_index).shape().dim(0) == 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) == 0)) &&
+ (!operands.exist(input_gate_bias_index) ||
+ operands.at(input_gate_bias_index).shape().dim(0) == 0) &&
+ (!operands.exist(cell_to_input_weights_index) ||
+ operands.at(cell_to_input_weights_index).shape().dim(0) == 0)) ||
+ ((operands.exist(input_to_input_weights_index) &&
+ (operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(input_to_input_weights_index).shape().dim(1) != 0)) &&
+ (operands.exist(recurrent_to_input_weights_index) &&
+ (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0)) &&
+ (operands.exist(input_gate_bias_index) &&
+ operands.at(input_gate_bias_index).shape().dim(0) != 0)));
// Peephole assertion
- OP_REQUIRES(((!_ctx.exist(cell_to_forget_weights_index) ||
- _ctx.at(cell_to_forget_weights_index).shape().dim(0) == 0) &&
- (!_ctx.exist(cell_to_output_weights_index) ||
- _ctx.at(cell_to_output_weights_index).shape().dim(0) == 0)) ||
- ((_ctx.exist(cell_to_forget_weights_index) &&
- _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0) &&
- (_ctx.exist(cell_to_output_weights_index) &&
- _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0)));
-
- bool has_input_to_input_weights = _ctx.exist(input_to_input_weights_index) &&
- (_ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0);
+ OP_REQUIRES(((!operands.exist(cell_to_forget_weights_index) ||
+ operands.at(cell_to_forget_weights_index).shape().dim(0) == 0) &&
+ (!operands.exist(cell_to_output_weights_index) ||
+ operands.at(cell_to_output_weights_index).shape().dim(0) == 0)) ||
+ ((operands.exist(cell_to_forget_weights_index) &&
+ operands.at(cell_to_forget_weights_index).shape().dim(0) != 0) &&
+ (operands.exist(cell_to_output_weights_index) &&
+ operands.at(cell_to_output_weights_index).shape().dim(0) != 0)));
+
+ bool has_input_to_input_weights =
+ operands.exist(input_to_input_weights_index) &&
+ (operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(input_to_input_weights_index).shape().dim(1) != 0);
bool has_recurrent_to_input_weights =
- _ctx.exist(recurrent_to_input_weights_index) &&
- (_ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
+ operands.exist(recurrent_to_input_weights_index) &&
+ (operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0);
bool has_input_gate_bias =
- _ctx.exist(input_gate_bias_index) && _ctx.at(input_gate_bias_index).shape().dim(0) != 0;
- bool has_cell_to_input_weights = _ctx.exist(cell_to_input_weights_index) &&
- _ctx.at(cell_to_input_weights_index).shape().dim(0) != 0;
- bool has_cell_to_forget_weights = _ctx.exist(cell_to_forget_weights_index) &&
- _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
- bool has_cell_to_output_weights = _ctx.exist(cell_to_output_weights_index) &&
- _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
- bool has_projection_weights = _ctx.exist(projection_weights_index) &&
- (_ctx.at(projection_weights_index).shape().dim(0) != 0 &&
- _ctx.at(projection_weights_index).shape().dim(1) != 0);
+ operands.exist(input_gate_bias_index) && operands.at(input_gate_bias_index).shape().dim(0) != 0;
+ bool has_cell_to_input_weights = operands.exist(cell_to_input_weights_index) &&
+ operands.at(cell_to_input_weights_index).shape().dim(0) != 0;
+ bool has_cell_to_forget_weights = operands.exist(cell_to_forget_weights_index) &&
+ operands.at(cell_to_forget_weights_index).shape().dim(0) != 0;
+ bool has_cell_to_output_weights = operands.exist(cell_to_output_weights_index) &&
+ operands.at(cell_to_output_weights_index).shape().dim(0) != 0;
+ bool has_projection_weights = operands.exist(projection_weights_index) &&
+ (operands.at(projection_weights_index).shape().dim(0) != 0 &&
+ operands.at(projection_weights_index).shape().dim(1) != 0);
bool has_projection_bias =
- _ctx.exist(projection_bias_index) && _ctx.at(projection_bias_index).shape().dim(0) != 0;
+ operands.exist(projection_bias_index) && operands.at(projection_bias_index).shape().dim(0) != 0;
// NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
// true: no CIFG
@@ -710,46 +733,48 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
// NOTE The projection weights may have data but the projection bias may not.
bool has_projection_param = has_projection_weights;
- const auto batch_size = (_ctx.at(input_index).shape().rank() == 3 && node.param().time_major)
- ? _ctx.at(input_index).shape().dim(1)
- : _ctx.at(input_index).shape().dim(0);
- OP_REQUIRES(batch_size == _ctx.at(output_state_in_index).shape().dim(0) &&
- batch_size == _ctx.at(cell_state_in_index).shape().dim(0));
-
- const auto input_size = _ctx.at(input_index).shape().dim(_ctx.at(input_index).shape().rank() - 1);
- OP_REQUIRES(input_size == _ctx.at(input_to_forget_weights_index).shape().dim(1) &&
- input_size == _ctx.at(input_to_cell_weights_index).shape().dim(1) &&
- input_size == _ctx.at(input_to_output_weights_index).shape().dim(1));
-
- const auto num_units = _ctx.at(input_to_output_weights_index).shape().dim(0);
- OP_REQUIRES(num_units == _ctx.at(input_to_cell_weights_index).shape().dim(0) &&
- num_units == _ctx.at(input_to_output_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_forget_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_cell_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_output_weights_index).shape().dim(0) &&
- num_units == _ctx.at(forget_gate_bias_index).shape().dim(0) &&
- num_units == _ctx.at(cell_bias_index).shape().dim(0) &&
- num_units == _ctx.at(output_gate_bias_index).shape().dim(0) &&
- num_units == _ctx.at(cell_state_in_index).shape().dim(1));
+ const auto batch_size = (operands.at(input_index).shape().rank() == 3 && node.param().time_major)
+ ? operands.at(input_index).shape().dim(1)
+ : operands.at(input_index).shape().dim(0);
+ OP_REQUIRES(batch_size == operands.at(output_state_in_index).shape().dim(0) &&
+ batch_size == operands.at(cell_state_in_index).shape().dim(0));
+
+ const auto input_size =
+ operands.at(input_index).shape().dim(operands.at(input_index).shape().rank() - 1);
+ OP_REQUIRES(input_size == operands.at(input_to_forget_weights_index).shape().dim(1) &&
+ input_size == operands.at(input_to_cell_weights_index).shape().dim(1) &&
+ input_size == operands.at(input_to_output_weights_index).shape().dim(1));
+
+ const auto num_units = operands.at(input_to_output_weights_index).shape().dim(0);
+ OP_REQUIRES(num_units == operands.at(input_to_cell_weights_index).shape().dim(0) &&
+ num_units == operands.at(input_to_output_weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_to_forget_weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_to_cell_weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_to_output_weights_index).shape().dim(0) &&
+ num_units == operands.at(forget_gate_bias_index).shape().dim(0) &&
+ num_units == operands.at(cell_bias_index).shape().dim(0) &&
+ num_units == operands.at(output_gate_bias_index).shape().dim(0) &&
+ num_units == operands.at(cell_state_in_index).shape().dim(1));
const auto output_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
- OP_REQUIRES(output_size == _ctx.at(recurrent_to_forget_weights_index).shape().dim(1) &&
- output_size == _ctx.at(recurrent_to_cell_weights_index).shape().dim(1) &&
- output_size == _ctx.at(recurrent_to_output_weights_index).shape().dim(1) &&
- output_size == _ctx.at(output_state_in_index).shape().dim(1));
+ operands.at(output_index).shape().dim(operands.at(output_index).shape().rank() - 1);
+ OP_REQUIRES(output_size == operands.at(recurrent_to_forget_weights_index).shape().dim(1) &&
+ output_size == operands.at(recurrent_to_cell_weights_index).shape().dim(1) &&
+ output_size == operands.at(recurrent_to_output_weights_index).shape().dim(1) &&
+ output_size == operands.at(output_state_in_index).shape().dim(1));
if (has_cifg_param)
{
- OP_REQUIRES(input_size == _ctx.at(input_to_input_weights_index).shape().dim(1));
- OP_REQUIRES(num_units == _ctx.at(input_to_input_weights_index).shape().dim(0) &&
- num_units == _ctx.at(recurrent_to_input_weights_index).shape().dim(0) &&
- ((_ctx.exist(cell_to_input_weights_index) &&
- num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0)) ||
- (!_ctx.exist(cell_to_input_weights_index) ||
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0) /* non-peephole */) &&
- num_units == _ctx.at(input_gate_bias_index).shape().dim(0));
- OP_REQUIRES(output_size == _ctx.at(recurrent_to_input_weights_index).shape().dim(1));
+ OP_REQUIRES(input_size == operands.at(input_to_input_weights_index).shape().dim(1));
+ OP_REQUIRES(
+ num_units == operands.at(input_to_input_weights_index).shape().dim(0) &&
+ num_units == operands.at(recurrent_to_input_weights_index).shape().dim(0) &&
+ ((operands.exist(cell_to_input_weights_index) &&
+ num_units == operands.at(cell_to_input_weights_index).shape().dim(0)) ||
+ (!operands.exist(cell_to_input_weights_index) ||
+ operands.at(cell_to_input_weights_index).shape().dim(0) == 0) /* non-peephole */) &&
+ num_units == operands.at(input_gate_bias_index).shape().dim(0));
+ OP_REQUIRES(output_size == operands.at(recurrent_to_input_weights_index).shape().dim(1));
OP_REQUIRES(has_input_to_input_weights && has_recurrent_to_input_weights &&
has_input_gate_bias);
if (has_cell_to_input_weights)
@@ -757,64 +782,65 @@ void ShapeValidator::visit(const ir::operation::LSTM &node)
// NOTE The cell_to_input_weights exist only in case of non-CIFG and peephole.
OP_REQUIRES(has_peephole_param);
}
- if (_ctx.exist(scratch_buffer_index))
- OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 4);
+ if (operands.exist(scratch_buffer_index))
+ OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 4);
}
else
{
- if (_ctx.exist(scratch_buffer_index))
- OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().dim(1) == num_units * 3);
+ if (operands.exist(scratch_buffer_index))
+ OP_REQUIRES(operands.at(scratch_buffer_index).shape().dim(1) == num_units * 3);
}
if (has_peephole_param)
{
- OP_REQUIRES(num_units == _ctx.at(cell_to_forget_weights_index).shape().dim(0) &&
- num_units == _ctx.at(cell_to_output_weights_index).shape().dim(0) &&
- (num_units == _ctx.at(cell_to_input_weights_index).shape().dim(0) ||
- _ctx.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */));
+ OP_REQUIRES(num_units == operands.at(cell_to_forget_weights_index).shape().dim(0) &&
+ num_units == operands.at(cell_to_output_weights_index).shape().dim(0) &&
+ (num_units == operands.at(cell_to_input_weights_index).shape().dim(0) ||
+ operands.at(cell_to_input_weights_index).shape().dim(0) == 0 /* CIFG */));
}
if (has_projection_param)
{
- OP_REQUIRES(num_units == _ctx.at(projection_weights_index).shape().dim(1));
- OP_REQUIRES(output_size == _ctx.at(projection_weights_index).shape().dim(0));
+ OP_REQUIRES(num_units == operands.at(projection_weights_index).shape().dim(1));
+ OP_REQUIRES(output_size == operands.at(projection_weights_index).shape().dim(0));
if (has_projection_bias)
{
- OP_REQUIRES(output_size == _ctx.at(projection_bias_index).shape().dim(0));
+ OP_REQUIRES(output_size == operands.at(projection_bias_index).shape().dim(0));
}
}
- if (_ctx.exist(scratch_buffer_index))
+ if (operands.exist(scratch_buffer_index))
{
- OP_REQUIRES(_ctx.at(scratch_buffer_index).shape().rank() == 2);
- OP_REQUIRES(batch_size == _ctx.at(scratch_buffer_index).shape().dim(0));
+ OP_REQUIRES(operands.at(scratch_buffer_index).shape().rank() == 2);
+ OP_REQUIRES(batch_size == operands.at(scratch_buffer_index).shape().dim(0));
}
- if (_ctx.exist(output_state_out_index))
+ if (operands.exist(output_state_out_index))
{
- OP_REQUIRES(_ctx.at(output_state_out_index).shape().rank() == 2);
- OP_REQUIRES(batch_size == _ctx.at(output_state_out_index).shape().dim(0));
- OP_REQUIRES(output_size == _ctx.at(output_state_out_index).shape().dim(1));
+ OP_REQUIRES(operands.at(output_state_out_index).shape().rank() == 2);
+ OP_REQUIRES(batch_size == operands.at(output_state_out_index).shape().dim(0));
+ OP_REQUIRES(output_size == operands.at(output_state_out_index).shape().dim(1));
}
- if (_ctx.exist(cell_state_out_index))
+ if (operands.exist(cell_state_out_index))
{
- OP_REQUIRES(_ctx.at(cell_state_out_index).shape().rank() == 2);
- OP_REQUIRES(batch_size == _ctx.at(cell_state_out_index).shape().dim(0));
- OP_REQUIRES(num_units == _ctx.at(cell_state_out_index).shape().dim(1));
+ OP_REQUIRES(operands.at(cell_state_out_index).shape().rank() == 2);
+ OP_REQUIRES(batch_size == operands.at(cell_state_out_index).shape().dim(0));
+ OP_REQUIRES(num_units == operands.at(cell_state_out_index).shape().dim(1));
}
}
void ShapeValidator::visit(const ir::operation::L2Normalization &node)
{
+ const auto &operands = _graph.operands();
const auto ofm_index{node.getOutputs().at(0)};
- if (_ctx.at(ofm_index).info().isDynamic())
+ if (operands.at(ofm_index).info().isDynamic())
return;
const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
- auto ifm_shape = _ctx.at(ifm_index).shape();
- auto ofm_shape = _ctx.at(ofm_index).shape();
+ auto ifm_shape = operands.at(ifm_index).shape();
+ auto ofm_shape = operands.at(ofm_index).shape();
OP_REQUIRES(ifm_shape.rank() == ofm_shape.rank());
@@ -826,14 +852,15 @@ void ShapeValidator::visit(const ir::operation::L2Normalization &node)
void ShapeValidator::visit(const ir::operation::Unpack &node)
{
+ const auto &operands = _graph.operands();
const auto axis{node.param().axis};
const auto output_index{node.getInputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
- const auto &input_shape = _ctx.at(input_index).shape();
+ const auto &input_shape = operands.at(input_index).shape();
const auto input_rank = static_cast<int32_t>(input_shape.rank());
OP_REQUIRES(axis >= -input_rank && axis < input_rank);
@@ -841,22 +868,23 @@ void ShapeValidator::visit(const ir::operation::Unpack &node)
void ShapeValidator::visit(const ir::operation::Pad &node)
{
+ const auto &operands = _graph.operands();
const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
- OP_REQUIRES(_ctx.at(pad_index).typeInfo().type() == ir::DataType::INT32);
+ OP_REQUIRES(operands.at(pad_index).typeInfo().type() == ir::DataType::INT32);
const auto output_index{node.getInputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
- const auto &pad_shape = _ctx.at(pad_index).shape();
- const auto input_rank = static_cast<int32_t>(_ctx.at(input_index).shape().rank());
+ const auto &pad_shape = operands.at(pad_index).shape();
+ const auto input_rank = static_cast<int32_t>(operands.at(input_index).shape().rank());
OP_REQUIRES(pad_shape.rank() == 2);
OP_REQUIRES(pad_shape.dim(0) == input_rank);
OP_REQUIRES(pad_shape.dim(1) == 2);
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
+ OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
}
void ShapeValidator::visit(const ir::operation::Select &)
@@ -866,65 +894,70 @@ void ShapeValidator::visit(const ir::operation::Select &)
void ShapeValidator::visit(const ir::operation::StridedSlice &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
- OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4);
+ OP_REQUIRES(operands.at(input_index).shape().rank() <= 4);
}
void ShapeValidator::visit(const ir::operation::Split &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
const auto axis_index{node.getInputs().at(ir::operation::Split::Input::AXIS)};
const auto num_splits = node.param().num_splits;
- const auto input_rank = _ctx.at(input_index).shape().rank();
- auto axis = *reinterpret_cast<const int32_t *>(_ctx.at(axis_index).data()->base());
+ const auto input_rank = operands.at(input_index).shape().rank();
+ auto axis = *reinterpret_cast<const int32_t *>(operands.at(axis_index).data()->base());
axis = axis < 0 ? axis + input_rank : axis;
OP_REQUIRES(axis >= 0 && axis < input_rank);
- OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0);
+ OP_REQUIRES(operands.at(input_index).shape().dim(axis) % num_splits == 0);
}
void ShapeValidator::visit(const ir::operation::Shape &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(0)};
UNUSED_RELEASE(input_index);
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(output_index).shape().rank() == 1);
}
void ShapeValidator::visit(const ir::operation::ResizeBilinear &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
{
return;
}
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == 4);
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(input_index).shape().rank() == 4);
+ OP_REQUIRES(operands.at(output_index).shape().rank() == 4);
}
void ShapeValidator::visit(const ir::operation::Reverse &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reverse::Input::INPUT)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
- OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape());
+ OP_REQUIRES(operands.at(output_index).shape() == operands.at(input_index).shape());
}
void ShapeValidator::visit(const ir::operation::If &)
@@ -940,17 +973,18 @@ void ShapeValidator::visit(const ir::operation::While &)
void ShapeValidator::visit(const ir::operation::SquaredDifference &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
// Check for dimension constraints
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
- auto output_shape = _ctx.at(output_index).shape();
- auto lhs_shape = _ctx.at(lhs_index).shape();
- auto rhs_shape = _ctx.at(rhs_index).shape();
+ auto output_shape = operands.at(output_index).shape();
+ auto lhs_shape = operands.at(lhs_index).shape();
+ auto rhs_shape = operands.at(rhs_index).shape();
// Check for output rank
OP_REQUIRES(output_shape.rank() == std::max(lhs_shape.rank(), rhs_shape.rank()));
auto min_rank = std::min(lhs_shape.rank(), rhs_shape.rank());
@@ -982,36 +1016,40 @@ void ShapeValidator::visit(const ir::operation::SquaredDifference &node)
}
void ShapeValidator::visit(const ir::operation::Tile &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(0)};
const auto multiple_index{node.getInputs().at(1)};
- OP_REQUIRES(_ctx.at(multiple_index).shape().rank() == 1);
- OP_REQUIRES(_ctx.at(multiple_index).shape().dim(0) == _ctx.at(input_index).shape().rank());
- OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank());
+ OP_REQUIRES(operands.at(multiple_index).shape().rank() == 1);
+ OP_REQUIRES(operands.at(multiple_index).shape().dim(0) ==
+ operands.at(input_index).shape().rank());
+ OP_REQUIRES(operands.at(input_index).shape().rank() == operands.at(output_index).shape().rank());
}
void ShapeValidator::visit(const ir::operation::Range &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto start_index{node.getInputs().at(ir::operation::Range::Input::START)};
const auto limit_index{node.getInputs().at(ir::operation::Range::Input::LIMIT)};
const auto delta_index{node.getInputs().at(ir::operation::Range::Input::DELTA)};
// Check for dimension constraints
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
- OP_REQUIRES(_ctx.at(start_index).shape().rank() == 0);
- OP_REQUIRES(_ctx.at(limit_index).shape().rank() == 0);
- OP_REQUIRES(_ctx.at(delta_index).shape().rank() == 0);
+ OP_REQUIRES(operands.at(start_index).shape().rank() == 0);
+ OP_REQUIRES(operands.at(limit_index).shape().rank() == 0);
+ OP_REQUIRES(operands.at(delta_index).shape().rank() == 0);
}
void ShapeValidator::visit(const ir::operation::MatrixBandPart &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)};
const auto num_lower_index{
@@ -1020,23 +1058,24 @@ void ShapeValidator::visit(const ir::operation::MatrixBandPart &node)
node.getInputs().at(ir::operation::MatrixBandPart::Input::NUM_UPPER_DIAG)};
// Check for dimension constraints
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
- OP_REQUIRES(_ctx.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix
- OP_REQUIRES(_ctx.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar
- OP_REQUIRES(_ctx.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar
+ OP_REQUIRES(operands.at(input_index).shape().rank() >= 2); // input must be more than 2 dim matrix
+ OP_REQUIRES(operands.at(num_upper_index).shape().rank() == 0); // num_lower must be scalar
+ OP_REQUIRES(operands.at(num_lower_index).shape().rank() == 0); // num_upper must be scalar
}
void ShapeValidator::visit(const ir::operation::LogSoftmax &node)
{
+ const auto &operands = _graph.operands();
const auto output_index{node.getOutputs().at(0)};
- if (_ctx.at(output_index).info().isDynamic())
+ if (operands.at(output_index).info().isDynamic())
return;
const auto input_index{node.getInputs().at(0)};
- OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank());
+ OP_REQUIRES(operands.at(output_index).shape().rank() == operands.at(input_index).shape().rank());
}
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/ShapeValidator.h b/runtime/onert/core/src/compiler/ShapeValidator.h
index 763cf7ce3..a51e8adc0 100644
--- a/runtime/onert/core/src/compiler/ShapeValidator.h
+++ b/runtime/onert/core/src/compiler/ShapeValidator.h
@@ -39,8 +39,13 @@ class ShapeValidator : public ir::OperationVisitor
public:
ShapeValidator(void) = delete;
ShapeValidator(const ir::Graph &graph);
+ ShapeValidator(const ShapeValidator &) = delete;
+ ShapeValidator(ShapeValidator &&) = delete;
+ ~ShapeValidator() = default;
public:
+ ShapeValidator &operator=(const ShapeValidator &) = delete;
+ ShapeValidator &operator=(ShapeValidator &&) = delete;
void operator()();
public:
@@ -90,10 +95,7 @@ private:
void checkUnaryOp(const ir::Operation &node);
private:
- // TODO Remove _ctx field
const ir::Graph &_graph;
- const ir::Operands &_ctx;
- ir::Layout _current_layout;
};
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
index f2fee2c3c..485450560 100644
--- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
@@ -19,62 +19,90 @@
#include "util/logging.h"
#include <sstream>
+#include <stdexcept>
namespace onert
{
namespace compiler
{
-
-void StaticShapeInferer::inferSubgraph(ir::SubgraphIndex subg_ind)
+void OperandObserver::updateShapes(const std::vector<ir::OperandInfo> &changed_operands_info,
+ bool unpredictable)
{
- StaticShapeInferer inferer(subg_ind, _lowered_subgs);
- auto &lgraph = _lowered_subgs.at(subg_ind);
- for (auto op_ind : lgraph->graph().topolSortOperations())
+ assert(changed_operands_info.size() == _operands.size());
+ for (size_t i = 0; i < changed_operands_info.size(); ++i)
{
- auto &op = lgraph->graph().operations().at(op_ind);
- bool has_dynamic_tensor = inferer.infer(op);
- lgraph->setHasDynamicTensor(op_ind, has_dynamic_tensor);
+ const auto &changed_operand_info = changed_operands_info.at(i);
+ auto &operand = _operands.at(i);
+ // assert(changed_operand_info.typeInfo() == operand->typeInfo());
+ // assert(changed_operand_info.typeInfo() == operand->typeInfo());
+ // This error check may by replaced by an assertion if this function is called after the
+ // validation of models are completed.
+ if (changed_operand_info.typeInfo() != operand->typeInfo())
+ {
+ throw std::runtime_error("OperandObserver: The types of operands are mismatched");
+ }
+ if (!operand->info().isConstant() && (changed_operand_info.isDynamic() || unpredictable))
+ {
+ operand->info().setDynamic();
+ }
+ else
+ {
+ const auto &new_shape = changed_operands_info.at(i).shape();
+ operand->info().shape(new_shape);
+ }
}
}
-bool StaticShapeInferer::infer(const ir::Operation &op)
+void StaticShapeInferer::infer()
{
- bool has_dynamic_tensor = false;
-
- auto opcode = op.opcode();
-
- _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit()
-
- // IF: need shape inference for then, else
- // While: need shape inference for condition, body
- if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
- {
- op.accept(*this);
- }
- else
+ for (const auto &op_idx : _lowered_subg->graph().topolSortOperations())
{
- _return_has_dynamic_tensor = checkDynamicInput(op);
-
- if (_return_has_dynamic_tensor)
+ const auto &op = _lowered_subg->graph().operations().at(op_idx);
+ bool has_dynamic_tensor = false;
+ const auto opcode = op.opcode();
+ // IF: requires shape inference for then, else
+ // While: requires shape inference for condition, body
+ if (opcode == ir::OpCode::If || opcode == ir::OpCode::While)
{
- setDynamicOutput(op);
+ op.accept(*this);
}
else
{
- op.accept(*this);
+ has_dynamic_tensor = checkDynamicInput(op);
+ if (has_dynamic_tensor)
+ {
+ setDynamicOutput(op);
+ }
+ else
+ {
+ op.accept(*this);
+ }
}
+ has_dynamic_tensor = has_dynamic_tensor || checkDynamicOutput(op);
+ _lowered_subg->setHasDynamicTensor(op_idx, has_dynamic_tensor);
}
- has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor;
-
- return has_dynamic_tensor;
+ if (_controlflow_output_observer != nullptr)
+ {
+ // re-sizing output shapes of the controflow operation branching to this subgraph
+ std::vector<ir::OperandInfo> outputs_info;
+ const auto &graph = _lowered_subg->graph();
+ const auto &outputs = graph.getOutputs();
+ for (size_t i = 0; i < outputs.size(); ++i)
+ {
+ const auto &operand_info = graph.operands().at(outputs.at(i)).info();
+ outputs_info.emplace_back(operand_info);
+ }
+ _controlflow_output_observer->updateShapes(outputs_info);
+ }
}
bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op)
{
+ const auto &operands = _lowered_subg->graph().operands();
for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED)
{
- if (_operands.at(input_idx).info().isDynamic())
+ if (operands.at(input_idx).info().isDynamic())
{
return true;
}
@@ -83,11 +111,25 @@ bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op)
return false;
}
+bool StaticShapeInferer::checkDynamicOutput(const ir::Operation &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+ for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED)
+ {
+ if (operands.at(output_idx).info().isDynamic())
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
void StaticShapeInferer::setDynamicOutput(const ir::Operation &op)
{
+ auto &operands = _lowered_subg->graph().operands();
for (auto output_idx : op.getOutputs() | ir::Remove::UNDEFINED)
{
- _operands.at(output_idx).info().setDynamic();
+ operands.at(output_idx).info().setDynamic();
}
}
@@ -95,11 +137,12 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
const ir::OperandIndex lhs_idx,
const ir::OperandIndex rhs_idx)
{
- const auto &lhs = _operands.at(lhs_idx);
- const auto &rhs = _operands.at(rhs_idx);
+ auto &operands = _lowered_subg->graph().operands();
+ const auto &lhs = operands.at(lhs_idx);
+ const auto &rhs = operands.at(rhs_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// re-sizing output shape
ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape());
@@ -109,11 +152,12 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op,
const ir::OperandIndex input_idx)
{
- const auto &input = _operands.at(input_idx);
+ auto &operands = _lowered_subg->graph().operands();
+ const auto &input = operands.at(input_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// re-sizing output shape
ir::Shape new_shape = input.info().shape();
@@ -136,36 +180,31 @@ void StaticShapeInferer::dump()
return sstream.str();
};
- for (const auto &pair : _lowered_subgs)
- {
- const auto index = pair.first;
- const auto &lowered_subg = pair.second;
- VERBOSE(StaticShapeInferer) << index << std::endl;
- lowered_subg->graph().operands().iterate(
- [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
- VERBOSE(StaticShapeInferer)
- << " " << ind << ", " << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", "
- << get_shape_str(operand.info().shape()) << std::endl;
- });
- }
+ _lowered_subg->graph().operands().iterate(
+ [&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+ VERBOSE(StaticShapeInferer) << " " << ind << ", "
+ << (operand.info().isDynamic() ? "Dynamic" : "Static") << ", "
+ << get_shape_str(operand.info().shape()) << std::endl;
+ });
}
void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto axis_idx{op.getInputs().at(ir::operation::ArgMinMax::Input::AXIS)};
- const auto &axis = _operands.at(axis_idx);
+ const auto &axis = operands.at(axis_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
if (!axis.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -181,27 +220,31 @@ void StaticShapeInferer::visit(const ir::operation::ArgMinMax &op)
void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto lhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::LHS);
const auto rhs_index = op.getInputs().at(ir::operation::BatchMatMul::Input::RHS);
const auto output_index = op.getOutputs().at(0);
- const auto &lhs = _operands.at(lhs_index);
- const auto &rhs = _operands.at(rhs_index);
- auto &output = _operands.at(output_index);
+ const auto &lhs = operands.at(lhs_index);
+ const auto &rhs = operands.at(rhs_index);
+ auto &output = operands.at(output_index);
auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param());
output.info().shape(new_shape);
}
void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::BCQFullyConnected::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto cluster_idx{
op.getInputs().at(ir::operation::BCQFullyConnected::Input::WEIGHTS_CLUSTERS)};
- const auto &cluster = _operands.at(cluster_idx);
+ const auto &cluster = operands.at(cluster_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base());
assert(cluster_buf);
@@ -214,17 +257,19 @@ void StaticShapeInferer::visit(const ir::operation::BCQFullyConnected &op)
void StaticShapeInferer::visit(const ir::operation::BCQGather &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto indices_idx{op.getInputs().at(ir::operation::BCQGather::Input::INDICES)};
- const auto &indices = _operands.at(indices_idx);
+ const auto &indices = operands.at(indices_idx);
const auto input_binary_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_BINARY)};
- const auto &input_binary = _operands.at(input_binary_idx);
+ const auto &input_binary = operands.at(input_binary_idx);
const auto cluster_idx{op.getInputs().at(ir::operation::BCQGather::Input::INPUT_CLUSTERS)};
- const auto &cluster = _operands.at(cluster_idx);
+ const auto &cluster = operands.at(cluster_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
auto cluster_buf = reinterpret_cast<const int32_t *>(cluster.data()->base());
assert(cluster_buf);
@@ -247,16 +292,16 @@ void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op)
void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op)
{
// get mutable output operand
+ auto &operands = _lowered_subg->graph().operands();
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)};
- const auto &shape = _operands.at(shape_idx);
+ const auto &shape = operands.at(shape_idx);
if (!shape.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -276,16 +321,18 @@ void StaticShapeInferer::visit(const ir::operation::Comparison &op)
void StaticShapeInferer::visit(const ir::operation::Concat &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_count = op.getInputs().size();
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
shape_inference::Shapes input_shapes;
for (uint32_t i = 0; i < input_count; i++)
{
const auto input_idx{op.getInputs().at(i)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
input_shapes.emplace_back(input.shape());
}
@@ -297,12 +344,14 @@ void StaticShapeInferer::visit(const ir::operation::Concat &op)
void StaticShapeInferer::visit(const ir::operation::Conv2D &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Conv2D::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto ker_idx{op.getInputs().at(ir::operation::Conv2D::Input::KERNEL)};
- const auto &ker = _operands.at(ker_idx);
+ const auto &ker = operands.at(ker_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// re-sizing output shape
ir::Shape new_shape =
@@ -328,17 +377,18 @@ void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op)
void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto axis_idx{op.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
- const auto &axis = _operands.at(axis_idx);
+ const auto &axis = operands.at(axis_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
if (!axis.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -360,15 +410,16 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op)
void StaticShapeInferer::visit(const ir::operation::Fill &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto shape_idx{op.getInputs().at(ir::operation::Fill::Input::SHAPE)};
- const auto &shape = _operands.at(shape_idx);
+ const auto &shape = operands.at(shape_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
if (!shape.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -390,15 +441,17 @@ void StaticShapeInferer::visit(const ir::operation::Fill &op)
void StaticShapeInferer::visit(const ir::operation::FullyConnected &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::FullyConnected::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto ker_idx{op.getInputs().at(ir::operation::FullyConnected::Input::WEIGHT)};
- const auto &ker = _operands.at(ker_idx);
+ const auto &ker = operands.at(ker_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// re-sizing output shape
ir::Shape new_shape =
shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape());
@@ -412,15 +465,17 @@ void StaticShapeInferer::visit(const ir::operation::FusedBatchNorm &op)
void StaticShapeInferer::visit(const ir::operation::Gather &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Gather::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)};
- const auto &indices = _operands.at(indices_idx);
+ const auto &indices = operands.at(indices_idx);
const auto rank = input.info().shape().rank();
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
@@ -434,70 +489,21 @@ void StaticShapeInferer::visit(const ir::operation::Gather &op)
void StaticShapeInferer::visit(const ir::operation::If &op)
{
- auto &then_graph = _lowered_subgs.at(op.param().then_subg_index)->graph();
- auto &else_graph = _lowered_subgs.at(op.param().else_subg_index)->graph();
+ // re-sizing input shapes of then/else subgraph
const std::vector<ir::OperandIndex> inputs{op.getInputs().begin() + 1, op.getInputs().end()};
- const auto &outputs = op.getOutputs();
- // re-sizing input shapes of then subgraph
- const auto &then_inputs = then_graph.getInputs();
- assert(inputs.size() == then_inputs.size());
+ std::vector<ir::OperandInfo> inputs_info;
+ const auto &graph = _lowered_subg->graph();
for (size_t i = 0; i < inputs.size(); ++i)
{
- auto &then_input = then_graph.operands().at(then_inputs.at(i));
- if (_operands.at(inputs.at(i)).info().isDynamic())
- {
- then_input.info().setDynamic();
- }
- else
- {
- auto new_shape = _operands.at(inputs.at(i)).info().shape();
- then_input.info().shape(new_shape);
- }
+ const auto &operand_info = graph.operands().at(inputs.at(i)).info();
+ inputs_info.emplace_back(operand_info);
}
+ _subg_input_observers.at(op.param().then_subg_index)->updateShapes(inputs_info);
+ _child_inferers.at(op.param().then_subg_index)->infer();
- // re-sizing input shapes of else subgraph
- const auto &else_inputs = else_graph.getInputs();
- assert(inputs.size() == else_inputs.size());
- for (size_t i = 0; i < inputs.size(); ++i)
- {
- auto &else_input = else_graph.operands().at(else_inputs.at(i));
- if (_operands.at(inputs.at(i)).info().isDynamic())
- {
- else_input.info().setDynamic();
- }
- else
- {
- const auto &new_shape = _operands.at(inputs.at(i)).info().shape();
- else_input.info().shape(new_shape);
- }
- }
-
- inferSubgraph(op.param().then_subg_index);
- inferSubgraph(op.param().else_subg_index);
-
- // re-sizing output shapes
- // TODO use then_graph / else_graph instead
- const auto &then_outputs = _lowered_subgs.at(op.param().then_subg_index)->graph().getOutputs();
- const auto &else_outputs = _lowered_subgs.at(op.param().else_subg_index)->graph().getOutputs();
- assert(outputs.size() == then_outputs.size());
- assert(outputs.size() == else_outputs.size());
- for (size_t i = 0; i < outputs.size(); ++i)
- {
- const auto &then_output = then_graph.operands().at(then_outputs.at(i));
- const auto &else_output = else_graph.operands().at(else_outputs.at(i));
- auto &output = _operands.at(outputs.at(i));
- if (!then_output.info().isDynamic() && !else_output.info().isDynamic() &&
- then_output.shape() == else_output.shape())
- {
- output.info().shape(then_output.shape());
- }
- else
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- }
- }
+ _subg_input_observers.at(op.param().else_subg_index)->updateShapes(inputs_info);
+ _child_inferers.at(op.param().else_subg_index)->infer();
}
void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
@@ -507,8 +513,10 @@ void StaticShapeInferer::visit(const ir::operation::L2Normalization &op)
void StaticShapeInferer::visit(const ir::operation::LSTM &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto output_index{op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
- auto &output = _operands.at(output_index);
+ auto &output = operands.at(output_index);
const auto output_state_out_index{
op.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
@@ -518,24 +526,24 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op)
const auto scratch_buffer_index{op.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
if (output.info().isDynamic() ||
- (_operands.exist(output_state_out_index) &&
- _operands.at(output_state_out_index).info().isDynamic()) ||
- (_operands.exist(cell_state_out_index) &&
- _operands.at(cell_state_out_index).info().isDynamic()) ||
- (_operands.exist(scratch_buffer_index) &&
- _operands.at(scratch_buffer_index).info().isDynamic()))
+ (operands.exist(output_state_out_index) &&
+ operands.at(output_state_out_index).info().isDynamic()) ||
+ (operands.exist(cell_state_out_index) &&
+ operands.at(cell_state_out_index).info().isDynamic()) ||
+ (operands.exist(scratch_buffer_index) &&
+ operands.at(scratch_buffer_index).info().isDynamic()))
return;
const auto input_index{op.getInputs().at(ir::operation::LSTM::Input::INPUT)};
- const auto &input = _operands.at(input_index);
+ const auto &input = operands.at(input_index);
const auto input_to_output_weights_index{
op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
- const auto &input_to_output_weights = _operands.at(input_to_output_weights_index);
+ const auto &input_to_output_weights = operands.at(input_to_output_weights_index);
const auto recurrent_to_output_weights_index{
op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
- const auto &recurrent_to_output_weights = _operands.at(recurrent_to_output_weights_index);
+ const auto &recurrent_to_output_weights = operands.at(recurrent_to_output_weights_index);
// re-sizing outputs
const int n_batch = (input.shape().rank() == 3 && op.param().time_major) ? input.shape().dim(1)
@@ -555,21 +563,21 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op)
output.info().shape(ir::Shape{n_batch, n_output});
}
- if (_operands.exist(output_state_out_index))
+ if (operands.exist(output_state_out_index))
{
- auto &output_state_out = _operands.at(output_state_out_index);
+ auto &output_state_out = operands.at(output_state_out_index);
output_state_out.info().shape(ir::Shape{n_batch, n_output});
}
- if (_operands.exist(cell_state_out_index))
+ if (operands.exist(cell_state_out_index))
{
- auto &cell_state_out = _operands.at(cell_state_out_index);
+ auto &cell_state_out = operands.at(cell_state_out_index);
cell_state_out.info().shape(ir::Shape{n_batch, n_cell});
}
- if (_operands.exist(scratch_buffer_index))
+ if (operands.exist(scratch_buffer_index))
{
- auto &scratch_buffer = _operands.at(scratch_buffer_index);
+ auto &scratch_buffer = operands.at(scratch_buffer_index);
const auto input_to_input_weights_index{
op.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)};
@@ -577,11 +585,11 @@ void StaticShapeInferer::visit(const ir::operation::LSTM &op)
op.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)};
bool has_input_to_input_weights =
- _operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _operands.at(input_to_input_weights_index).shape().dim(1) != 0;
+ operands.at(input_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(input_to_input_weights_index).shape().dim(1) != 0;
bool has_recurrent_to_input_weights =
- _operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
+ operands.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
+ operands.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
// NOTE The cell_to_input_weights do not exist in non-peephole although regular LSTM(non-CIFG).
// true: no CIFG
@@ -605,20 +613,21 @@ void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op)
void StaticShapeInferer::visit(const ir::operation::OneHot &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)};
- const auto &indice = _operands.at(indice_idx);
+ const auto &indice = operands.at(indice_idx);
const auto depth_idx{op.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
- const auto &depth = _operands.at(depth_idx);
+ const auto &depth = operands.at(depth_idx);
const auto axis = op.param().axis;
auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
if (!depth.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -631,12 +640,14 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op)
void StaticShapeInferer::visit(const ir::operation::Pack &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(0)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
const auto rank = input.shape().rank() + 1;
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
@@ -651,21 +662,22 @@ void StaticShapeInferer::visit(const ir::operation::Pack &op)
void StaticShapeInferer::visit(const ir::operation::Pad &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Pad::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto pad_idx{op.getInputs().at(ir::operation::Pad::Input::PAD)};
- const auto &pad = _operands.at(pad_idx);
+ const auto &pad = operands.at(pad_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// if pad is not constant, output also becomes dynamic
if (!pad.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -678,10 +690,12 @@ void StaticShapeInferer::visit(const ir::operation::Pad &op)
void StaticShapeInferer::visit(const ir::operation::Permute &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(0)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// re-sizing output shape
// Permute is a special operation that layouts of input/output may be different on backend
@@ -700,16 +714,18 @@ void StaticShapeInferer::visit(const ir::operation::Pow &op)
void StaticShapeInferer::visit(const ir::operation::Range &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto start_idx{op.getInputs().at(ir::operation::Range::Input::START)};
const auto limit_idx{op.getInputs().at(ir::operation::Range::Input::LIMIT)};
const auto delta_idx{op.getInputs().at(ir::operation::Range::Input::DELTA)};
- const auto &start_op = _operands.at(start_idx);
- const auto &limit_op = _operands.at(limit_idx);
- const auto &delta_op = _operands.at(delta_idx);
+ const auto &start_op = operands.at(start_idx);
+ const auto &limit_op = operands.at(limit_idx);
+ const auto &delta_op = operands.at(delta_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
ir::Shape new_shape;
if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant())
@@ -731,21 +747,22 @@ void StaticShapeInferer::visit(const ir::operation::Range &op)
else
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
}
}
void StaticShapeInferer::visit(const ir::operation::Reduce &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Reduce::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto axes_idx{op.getInputs().at(ir::operation::Reduce::Input::AXES)};
- const auto &axes = _operands.at(axes_idx);
+ const auto &axes = operands.at(axes_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
std::vector<int32_t> axes_vec;
for (size_t i = 0; i < axes.shape().num_elements(); ++i)
@@ -777,19 +794,21 @@ void StaticShapeInferer::visit(const ir::operation::Reduce &op)
void StaticShapeInferer::visit(const ir::operation::Reshape &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// New shape is given by second input tensor
if (op.getInputs().size() == 2)
{
// Let's check the second input
const auto shape_idx{op.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
- const auto &shape = _operands.at(shape_idx);
+ const auto &shape = operands.at(shape_idx);
if (shape.isConstant())
{
@@ -810,7 +829,6 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op)
{
// if shape is NOT Const, set output shape to be dynamic_
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
}
}
// New shape is given by option
@@ -835,21 +853,22 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op)
void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
int32_t height_out, width_out;
if (op.getInputs().size() == 2)
{
- auto &size = _operands.at(op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE));
+ auto &size = operands.at(op.getInputs().at(ir::operation::ResizeBilinear::Input::SIZE));
if (!size.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
const auto size_v = size.asVector<std::int32_t>();
@@ -881,17 +900,19 @@ void StaticShapeInferer::visit(const ir::operation::Reverse &op)
void StaticShapeInferer::visit(const ir::operation::Select &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)};
- const auto &input_cond = _operands.at(input_cond_idx);
+ const auto &input_cond = operands.at(input_cond_idx);
const auto input_true_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
- const auto &input_true = _operands.at(input_true_idx);
+ const auto &input_true = operands.at(input_true_idx);
const auto input_false_idx{op.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
- const auto &input_false = _operands.at(input_false_idx);
+ const auto &input_false = operands.at(input_false_idx);
auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// Select output shpae
ir::Shape new_shape = shape_inference::inferSelectShape(
@@ -901,12 +922,14 @@ void StaticShapeInferer::visit(const ir::operation::Select &op)
void StaticShapeInferer::visit(const ir::operation::Shape &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(0)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// re-sizing output shape
ir::Shape output_shape;
@@ -917,20 +940,21 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op)
void StaticShapeInferer::visit(const ir::operation::Slice &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)};
- const auto &input = _operands.at(input_index);
+ const auto &input = operands.at(input_index);
const auto begins_index{op.getInputs().at(ir::operation::Slice::Input::BEGINS)};
- const auto &begins = _operands.at(begins_index);
+ const auto &begins = operands.at(begins_index);
const auto sizes_index{op.getInputs().at(ir::operation::Slice::Input::SIZES)};
- const auto &sizes = _operands.at(sizes_index);
+ const auto &sizes = operands.at(sizes_index);
const auto output_index = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_index);
+ ir::Operand &output = operands.at(output_index);
// Whether input is constant or not does not affect whether output is dynamic or not
if (!(begins.isConstant() && sizes.isConstant()))
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -959,21 +983,22 @@ void StaticShapeInferer::visit(const ir::operation::Softmax &op)
void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto output_index = op.getOutputs().at(0);
const auto input_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
const auto block_shape_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto padding_idx{op.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- ir::Operand &output = _operands.at(output_index);
- const auto &input = _operands.at(input_idx);
- const auto &block_shape = _operands.at(block_shape_idx);
- const auto &padding = _operands.at(padding_idx);
+ ir::Operand &output = operands.at(output_index);
+ const auto &input = operands.at(input_idx);
+ const auto &block_shape = operands.at(block_shape_idx);
+ const auto &padding = operands.at(padding_idx);
// Whether input is constant or not does not affect whether output is dynamic or not
if (!(block_shape.isConstant() && padding.isConstant()))
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -992,21 +1017,22 @@ void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op)
void StaticShapeInferer::visit(const ir::operation::Split &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Split::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto axis_idx{op.getInputs().at(ir::operation::Split::Input::AXIS)};
- const auto &axis = _operands.at(axis_idx);
+ const auto &axis = operands.at(axis_idx);
auto outputs = op.getOutputs();
if (!axis.isConstant())
{
for (auto output_idx : outputs)
{
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
output.info().setDynamic();
}
- _return_has_dynamic_tensor = true;
return;
}
@@ -1022,7 +1048,7 @@ void StaticShapeInferer::visit(const ir::operation::Split &op)
shape_inference::inferSplitShape(input.info().shape(), axis_value, num_splits);
for (auto output_idx : outputs)
{
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
output.info().shape(new_shape);
}
}
@@ -1035,11 +1061,13 @@ void StaticShapeInferer::visit(const ir::operation::SquaredDifference &op)
void StaticShapeInferer::visit(const ir::operation::Squeeze &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
// Squeeze output shpae
ir::Shape new_shape = shape_inference::inferSqueezeShape(input.info().shape(), op.param());
@@ -1048,21 +1076,22 @@ void StaticShapeInferer::visit(const ir::operation::Squeeze &op)
void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_index{op.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
- const auto &input = _operands.at(input_index);
+ const auto &input = operands.at(input_index);
const auto starts_index{op.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
- const auto &starts = _operands.at(starts_index);
+ const auto &starts = operands.at(starts_index);
const auto ends_index{op.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
- const auto &ends = _operands.at(ends_index);
+ const auto &ends = operands.at(ends_index);
const auto strides_index{op.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- const auto &strides = _operands.at(strides_index);
+ const auto &strides = operands.at(strides_index);
const auto output_index = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_index);
+ ir::Operand &output = operands.at(output_index);
if (!(starts.isConstant() && ends.isConstant() && strides.isConstant()))
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -1085,19 +1114,20 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op)
void StaticShapeInferer::visit(const ir::operation::Tile &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto multiplier_idx{op.getInputs().at(ir::operation::Tile::Input::MULTIPLES)};
- const auto &multiplier = _operands.at(multiplier_idx);
+ const auto &multiplier = operands.at(multiplier_idx);
const auto output_idx = op.getOutputs().at(0);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
if (!multiplier.isConstant())
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -1112,11 +1142,13 @@ void StaticShapeInferer::visit(const ir::operation::Tile &op)
void StaticShapeInferer::visit(const ir::operation::Transpose &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto perm_idx{op.getInputs().at(ir::operation::Transpose::Input::PERMUTATION)};
- const auto &perm = _operands.at(perm_idx);
+ const auto &perm = operands.at(perm_idx);
// perm.shape() != ir::Shape{0} means that perm is (n-1...0)
// TODO This condition changes to perm.num_elements() == 0
@@ -1124,11 +1156,10 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op)
// get mutable output operand
const auto output_idx = op.getOutputs().at(0);
- auto &output = _operands.at(output_idx);
+ auto &output = operands.at(output_idx);
if (!perm.isConstant() && !is_regular_transpose)
{
output.info().setDynamic();
- _return_has_dynamic_tensor = true;
return;
}
@@ -1157,8 +1188,10 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op)
void StaticShapeInferer::visit(const ir::operation::Unpack &op)
{
+ auto &operands = _lowered_subg->graph().operands();
+
const auto input_idx{op.getInputs().at(0)};
- const auto &input = _operands.at(input_idx);
+ const auto &input = operands.at(input_idx);
const auto num = op.param().num;
const auto rank = input.shape().rank();
const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis);
@@ -1169,10 +1202,9 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op)
for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
{
const auto output_idx = op.getOutputs().at(out_tensor_idx);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
output.info().setDynamic();
}
- _return_has_dynamic_tensor = true;
return;
}
@@ -1182,69 +1214,43 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op)
for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++)
{
const auto output_idx = op.getOutputs().at(out_tensor_idx);
- ir::Operand &output = _operands.at(output_idx);
+ ir::Operand &output = operands.at(output_idx);
output.info().shape(new_shape);
}
}
void StaticShapeInferer::visit(const ir::operation::While &op)
{
- auto &cond_graph = _lowered_subgs.at(op.param().cond_subg_index)->graph();
- auto &body_graph = _lowered_subgs.at(op.param().body_subg_index)->graph();
+ auto body_input_observer = _subg_input_observers.at(op.param().body_subg_index).get();
+ auto cond_input_observer = _subg_input_observers.at(op.param().cond_subg_index).get();
+ // re-sizing input shapes of body subgraph
const auto inputs = op.getInputs();
- const auto &outputs = op.getOutputs();
-
- // re-sizing input shapes of then subgraph
- const auto &cond_inputs = cond_graph.getInputs();
- assert(inputs.size() == cond_inputs.size());
+ std::vector<ir::OperandInfo> inputs_info;
+ const auto &graph = _lowered_subg->graph();
for (size_t i = 0; i < inputs.size(); ++i)
{
- const auto &input = _operands.at(inputs.at(i));
- auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
- if (input.info().isDynamic())
- {
- cond_input.info().setDynamic();
- }
- else
- {
- auto new_shape = input.info().shape();
- cond_input.info().shape(new_shape);
- }
+ const auto &operand_info = graph.operands().at(inputs.at(i)).info();
+ inputs_info.emplace_back(operand_info);
}
- // re-sizing input shapes of body subgraph
- const auto &body_inputs = body_graph.getInputs();
- assert(cond_inputs.size() == body_inputs.size());
- for (size_t i = 0; i < cond_inputs.size(); ++i)
- {
- const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
- auto &body_input = body_graph.operands().at(body_inputs.at(i));
- if (cond_input.info().isDynamic())
- {
- body_input.info().setDynamic();
- }
- else
- {
- const auto &new_shape = cond_input.info().shape();
- body_input.info().shape(new_shape);
- }
- }
-
- // re-sizing operands of body subgraph
- inferSubgraph(op.param().body_subg_index);
+ body_input_observer->updateShapes(inputs_info);
+ _child_inferers.at(op.param().body_subg_index)->infer();
// Check whether while operation's shapes are predictable
- // If any of shape of body outputs and cond inputs are different, non-constant operands would be
- // set to dynamic
+ // This while op's outputs are also updated in the above function
+ // "_child_inferers.at(op.param().body_subg_index)->update()". That means that body's outputs and
+ // thils op's outputs must have the same shape. So we can predict whether body subgraphs will
+ // change at every step by comparing the shapes of inputs/outputs. If any of shape of body outputs
+ // and inputs are different Non-constant operands will be set to dynamic.
bool check_unpredictable_dynamic = false;
- const auto &body_outputs = body_graph.getOutputs();
- assert(body_outputs.size() == cond_inputs.size());
- for (size_t i = 0; i < body_outputs.size(); ++i)
+ const auto &updated_outputs = op.getOutputs();
+ assert(inputs_info.size() == updated_outputs.size());
+ for (size_t i = 0; i < updated_outputs.size(); ++i)
{
- const auto &body_output = body_graph.operands().at(body_outputs.at(i));
- auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
- if ((cond_input.info().isDynamic() != body_output.info().isDynamic()) ||
- (cond_input.shape() != body_output.shape()))
+ const auto &input_info = inputs_info.at(i);
+ const auto &output_info = graph.operands().at(updated_outputs.at(i)).info();
+ if (input_info.isDynamic() != output_info.isDynamic() ||
+ input_info.shape() != output_info.shape())
{
check_unpredictable_dynamic = true;
break;
@@ -1253,53 +1259,11 @@ void StaticShapeInferer::visit(const ir::operation::While &op)
if (check_unpredictable_dynamic)
{
- // Set inputs of body subgraph
- for (const auto &input_index : body_inputs)
- {
- auto &input = body_graph.operands().at(input_index);
- if (!input.isConstant())
- {
- input.info().setDynamic();
- }
- }
-
- // Set inputs of cond subgraph
- for (const auto &input_index : cond_inputs)
- {
- auto &input = cond_graph.operands().at(input_index);
- if (!input.isConstant())
- {
- input.info().setDynamic();
- }
- }
-
- // Set non-constant operands of body subgraph to dynamic
- inferSubgraph(op.param().body_subg_index);
- }
-
- // re-sizing operands of cond subgraph
- // If check_unpredictable_dynamic is true, non-constant operands of cond subgraph would be set to
- // dynamic
- inferSubgraph(op.param().cond_subg_index);
-
- // re-sizing outputs of while operation
- // If check_unpredictable_dynamic is true, outputs of while operation would be set to dynamic
- assert(cond_inputs.size() == outputs.size());
- for (size_t i = 0; i < cond_inputs.size(); ++i)
- {
- const auto &cond_input = cond_graph.operands().at(cond_inputs.at(i));
- auto &output = _operands.at(outputs.at(i));
- if (cond_input.info().isDynamic())
- {
- output.info().setDynamic();
- _return_has_dynamic_tensor = true;
- }
- else
- {
- const auto new_shape = cond_input.info().shape();
- output.info().shape(new_shape);
- }
+ body_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic);
+ _child_inferers.at(op.param().body_subg_index)->infer();
}
+ cond_input_observer->updateShapes(inputs_info, check_unpredictable_dynamic);
+ _child_inferers.at(op.param().cond_subg_index)->infer();
}
void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op)
@@ -1307,24 +1271,52 @@ void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op)
// TODO: NMS supports very limited input/output size.
ir::operation::DetectionPostProcess::Param param = op.param();
+ auto &operands = _lowered_subg->graph().operands();
const int num_detected_boxes = param.max_detections * param.max_classes_per_detection;
const auto output_idx1 = op.getOutputs().at(0);
- auto &output1 = _operands.at(output_idx1);
+ auto &output1 = operands.at(output_idx1);
output1.info().shape({1, num_detected_boxes, 4});
const auto output_idx2 = op.getOutputs().at(1);
- auto &output2 = _operands.at(output_idx2);
+ auto &output2 = operands.at(output_idx2);
output2.info().shape({1, num_detected_boxes});
const auto output_idx3 = op.getOutputs().at(2);
- auto &output3 = _operands.at(output_idx3);
+ auto &output3 = operands.at(output_idx3);
output3.info().shape({1, num_detected_boxes});
const auto output_idx4 = op.getOutputs().at(3);
- auto &output4 = _operands.at(output_idx4);
+ auto &output4 = operands.at(output_idx4);
output4.info().shape({1});
}
+void StaticShapeInferer::visit(const ir::operation::Bulk &op)
+{
+ auto &operands = _lowered_subg->graph().operands();
+
+ // TODO: support multiple inputs/outputs
+ const auto input_idx{op.getInputs().at(0)};
+ const auto &input = operands.at(input_idx);
+ const auto output_idx = op.getOutputs().at(0);
+ ir::Operand &output = operands.at(output_idx);
+
+ auto cur_input_shape = input.info().shape();
+ auto origin_input_shape = op.param().origin_input_shapes[0];
+ auto cur_output_shape = output.info().shape();
+ auto origin_output_shape = op.param().origin_output_shapes[0];
+
+ // TODO: more check for valid batch request
+ assert(cur_input_shape.dim(0) >= origin_output_shape.dim(0));
+ assert(cur_input_shape.dim(0) % origin_output_shape.dim(0) == 0);
+ size_t batch_multiplier = cur_input_shape.dim(0) / origin_output_shape.dim(0);
+
+ ir::Shape new_shape;
+ new_shape.append(origin_output_shape.dim(0) * batch_multiplier);
+ for (int32_t d = 1; d < origin_output_shape.rank(); ++d)
+ new_shape.append(origin_output_shape.dim(d));
+
+ output.info().shape(new_shape);
+}
} // namespace compiler
diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h
index 2a99db781..b3cc0bbe3 100644
--- a/runtime/onert/core/src/compiler/TensorRegistries.h
+++ b/runtime/onert/core/src/compiler/TensorRegistries.h
@@ -17,13 +17,14 @@
#ifndef __ONERT_COMPILER_TENSOR_REGISTRIES_H__
#define __ONERT_COMPILER_TENSOR_REGISTRIES_H__
-#include <unordered_set>
-#include <memory>
-#include "backend/BackendContext.h"
+#include "../backend/builtin/Config.h"
+#include "../backend/builtin/TensorRegistry.h"
+
#include "backend/Backend.h"
-#include "backend/builtin/Config.h"
-#include "backend/builtin/TensorBuilder.h"
-#include "backend/builtin/TensorRegistry.h"
+#include "backend/BackendContext.h"
+
+#include <memory>
+#include <unordered_set>
namespace onert
{
diff --git a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
index 181f388de..c27ce3d09 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc
@@ -15,7 +15,6 @@
*/
#include "PermutationEliminationPass.h"
-#include "backend/builtin/Config.h"
#include "util/logging.h"
diff --git a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
index 6f9899114..71efa1bb5 100644
--- a/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
+++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc
@@ -17,18 +17,16 @@
#include "PermutationInsertionPass.h"
-#include <cassert>
-#include <utility>
-#include <unordered_map>
+#include "../../backend/builtin/Config.h"
-#include "backend/builtin/Config.h"
-#include "ir/Operand.h"
#include "compiler/OperationLowerInfo.h"
-#include "ir/Graph.h"
-#include "backend/IConfig.h"
+#include "ir/operation/Permute.h"
#include "util/logging.h"
+
+#include <cassert>
#include <memory>
-#include "ir/operation/Permute.h"
+#include <unordered_map>
+#include <utility>
namespace onert
{
@@ -125,6 +123,8 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde
// backend
auto &model_outputs = _graph.getOutputs();
const backend::Backend *builtin_backend = compiler::BackendManager::get().getBuiltin();
+ assert(builtin_backend->config()->id() == onert::backend::builtin::Config::ID);
+
if (model_outputs.contains(operand_index) && factor.backend() == builtin_backend)
{
model_outputs.replace(operand_index, out_operand_index);
@@ -141,6 +141,8 @@ ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandInde
const auto permute_node_layout = ir::Layout::UNKNOWN;
// NOTE If one backend supports several layout, the backend must support Permute operation
const backend::Backend *permute_node_backend = compiler::BackendManager::get().getBuiltin();
+ assert(permute_node_backend->config()->id() == onert::backend::builtin::Config::ID);
+
if (input_backend == output_backend)
{
permute_node_backend = input_backend;
diff --git a/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc
new file mode 100644
index 000000000..572b4df24
--- /dev/null
+++ b/runtime/onert/core/src/compiler/pass/UnusedOperandEliminationPass.test.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "UnusedOperandEliminationPass.h"
+
+#include "ir/Graph.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::ir;
+using namespace onert::compiler::pass;
+
+TEST(UnusedOperandEliminationPass, Simple)
+{
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto in = graph.addOperand(shape, type);
+ auto out = graph.addOperand(shape, type);
+
+ auto unused = graph.addOperand(shape, type);
+
+ // Set model inputs/outputs
+ graph.addInput(in);
+ graph.addOutput(out);
+
+ UnusedOperandEliminationPass{graph}.run();
+
+ ASSERT_TRUE(graph.operands().exist(in));
+ ASSERT_TRUE(graph.operands().exist(out));
+ ASSERT_FALSE(graph.operands().exist(unused));
+}
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.cc b/runtime/onert/core/src/dumper/dot/DotDumper.cc
index 714fb6fda..0bb2fa11f 100644
--- a/runtime/onert/core/src/dumper/dot/DotDumper.cc
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.cc
@@ -19,6 +19,7 @@
#include "DotDumper.h"
#include "DotBuilder.h"
+#include "ir/OperandIndexMap.h"
#include "ir/OperationIndexMap.h"
#include "backend/Backend.h"
#include "backend/IConfig.h"
@@ -31,97 +32,72 @@ namespace dumper
namespace dot
{
-void DotDumper::dump(const std::string &tag)
+namespace
{
- if (_level == Level::OFF)
- {
- return;
- }
-
- onert::dumper::dot::DotBuilder dot_builder;
-
- auto &operations = _graph.operations();
- auto &operands = _graph.operands();
-
- ir::OperationIndexMap<std::unique_ptr<Operation>> operation_nodes;
- std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> operand_nodes;
-
- auto backend_to_fillcolor = [](const backend::Backend *backend) {
- static const auto map = []() {
- std::unordered_map<const backend::Backend *, std::string> ret;
- uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :(
- for (const auto backend : compiler::BackendManager::get().getAll())
- {
- ret.emplace(backend, Node::BG_COLORS[index]);
- index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0]));
- }
- return ret;
- }();
-
- auto itr = map.find(backend);
- if (itr == map.end())
- {
- return Node::DEFAULT_FILLCOLOR;
- }
- else
+std::string backend_to_fillcolor(const backend::Backend *backend)
+{
+ static const auto map = []() {
+ std::unordered_map<const backend::Backend *, std::string> ret;
+ uint32_t index = 1; // Start from 1 to avoid 0(red) which is too dark :(
+ for (const auto backend : compiler::BackendManager::get().getAll())
{
- return itr->second;
+ ret.emplace(backend, Node::BG_COLORS[index]);
+ index = (index + 1) % (sizeof(Node::BG_COLORS) / sizeof(Node::BG_COLORS[0]));
}
- };
+ return ret;
+ }();
+ auto itr = map.find(backend);
+ if (itr == map.end())
+ {
+ return Node::DEFAULT_FILLCOLOR;
+ }
+ else
+ {
+ return itr->second;
+ }
+}
- util::Set<ir::OperandIndex> shown_operand_set;
+std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>>
+generate_dot_operands(const ir::Graph &graph, const DotDumper::Level level)
+{
+ std::unordered_map<ir::OperandIndex, std::unique_ptr<Operand>> dot_operands;
+ const auto &operands = graph.operands();
operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &object) {
- bool showing_cond = false;
- if (_level == Level::ALL)
- {
- showing_cond = true;
- }
- else
- {
- showing_cond =
- !object.isConstant() || (_graph.getInputs() + _graph.getOutputs()).contains(index);
- }
+ bool showing_cond =
+ level == DotDumper::Level::ALL
+ ? true
+ : !object.isConstant() || (graph.getInputs() + graph.getOutputs()).contains(index);
if (showing_cond)
{
- shown_operand_set.add(index);
-
auto type = [&]() {
using onert::dumper::dot::Operand;
- if (_graph.getInputs().contains(index))
+ if (graph.getInputs().contains(index))
return Operand::Type::MODEL_INPUT;
- if (_graph.getOutputs().contains(index))
+ if (graph.getOutputs().contains(index))
return Operand::Type::MODEL_OUTPUT;
return Operand::Type::INTERNAL;
}();
auto node = std::make_unique<Operand>(index, type);
+ std::string label = std::to_string(index.value());
+ std::string fillcolor = "";
+ node->setAttribute("label", label);
+ node->setAttribute("fillcolor", fillcolor);
- {
- // Display LowerInfo attributes
- std::string label = std::to_string(index.value());
- std::string fillcolor = "";
- if (_lowered_graph)
- {
- auto lower_info = _lowered_graph->lower_info().operand.getRawPtr(index);
- const auto &def_factors = lower_info->def_factors();
- if (def_factors.size() > 0)
- {
- label += "\\n[";
- label += def_factors.getOnlyElement().backend()->config()->id();
- label += "]";
-
- fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend());
- }
- }
- node->setAttribute("label", label);
- node->setAttribute("fillcolor", fillcolor);
- }
-
- operand_nodes.emplace(index, std::move(node));
+ dot_operands.emplace(index, std::move(node));
}
});
+ return dot_operands;
+}
+
+ir::OperationIndexMap<std::unique_ptr<Operation>>
+generate_dot_operations(const ir::Graph &graph,
+ const ir::OperandIndexMap<std::unique_ptr<Operand>> &dot_operands)
+{
+ ir::OperationIndexMap<std::unique_ptr<Operation>> dot_operations;
+ const auto &operations = graph.operations();
operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &op) {
auto node = std::make_unique<Operation>(index, op);
@@ -130,42 +106,79 @@ void DotDumper::dump(const std::string &tag)
using onert::dumper::dot::Operand;
// Constant input and dump level is ALL_BUT_CONSTANTS
- if (operand_nodes.find(input) == operand_nodes.end())
+ if (dot_operands.find(input) == dot_operands.end())
continue;
- auto &input_node = operand_nodes.at(input);
+ auto &input_node = dot_operands.at(input);
input_node->addOutEdge(node.get());
}
for (auto output : op.getOutputs() | ir::Remove::UNDEFINED)
{
using onert::dumper::dot::Operand;
- auto &output_node = operand_nodes.at(output);
+ auto &output_node = dot_operands.at(output);
node->addOutEdge(output_node.get());
}
- operation_nodes.emplace(index, std::move(node));
+ dot_operations.emplace(index, std::move(node));
});
- if (_lowered_graph)
- {
- _graph.operations().iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
- const auto lower_info = _lowered_graph->lower_info().operation.getRawPtr(index);
- if (lower_info)
+ return dot_operations;
+}
+
+void update_lower_info(const compiler::LoweredGraph &lowered_graph,
+ ir::OperandIndexMap<std::unique_ptr<Operand>> *dot_operands)
+{
+ const auto &operands = lowered_graph.graph().operands();
+ operands.iterate([&](const ir::OperandIndex &index, const ir::Operand &) {
+ auto itr = dot_operands->find(index);
+ if (itr != dot_operands->end())
+ {
+ auto &node = itr->second;
+ // Display LowerInfo attributes
+ std::string label = node->getAttribute("label");
+ std::string fillcolor = node->getAttribute("fillcolor");
+ auto lower_info = lowered_graph.lower_info().operand.getRawPtr(index);
+ const auto &def_factors = lower_info->def_factors();
+ if (def_factors.size() > 0)
{
- auto fillcolor = backend_to_fillcolor(lower_info->backend());
- std::string backend_label = "[" + lower_info->backend()->config()->id() + "]";
- auto itr = operation_nodes.find(index);
- if (itr != operation_nodes.end())
- {
- auto &node = itr->second;
- node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label);
- node->setAttribute("fillcolor", fillcolor);
- }
+ label += "\\n[";
+ label += def_factors.getOnlyElement().backend()->config()->id();
+ label += "]";
+ fillcolor = backend_to_fillcolor(lower_info->def_factors().getOnlyElement().backend());
}
- });
- }
+ node->setAttribute("label", label);
+ node->setAttribute("fillcolor", fillcolor);
+ }
+ });
+}
+void update_lower_info(const compiler::LoweredGraph &lowered_graph,
+ ir::OperationIndexMap<std::unique_ptr<Operation>> *dot_operations)
+{
+ const auto &operations = lowered_graph.graph().operations();
+ operations.iterate([&](const ir::OperationIndex &index, const ir::Operation &) {
+ const auto lower_info = lowered_graph.lower_info().operation.getRawPtr(index);
+ if (lower_info)
+ {
+ auto fillcolor = backend_to_fillcolor(lower_info->backend());
+ std::string backend_label = "[" + lower_info->backend()->config()->id() + "]";
+ auto itr = dot_operations->find(index);
+ if (itr != dot_operations->end())
+ {
+ auto &node = itr->second;
+ node->setAttribute("label", node->getAttribute("label") + "\n" + backend_label);
+ node->setAttribute("fillcolor", fillcolor);
+ }
+ }
+ });
+}
+
+void dump_to_file(const ir::OperandIndexMap<std::unique_ptr<Operand>> &operand_nodes,
+ const ir::OperationIndexMap<std::unique_ptr<Operation>> &operation_nodes,
+ const std::string &tag)
+{
+ onert::dumper::dot::DotBuilder dot_builder;
for (const auto &e : operation_nodes)
dot_builder.update(*e.second);
for (const auto &e : operand_nodes)
@@ -186,6 +199,33 @@ void DotDumper::dump(const std::string &tag)
fb.close();
}
}
+} // namespace
+
+void DotDumper::dump(const ir::Graph &graph, const std::string &tag)
+{
+ if (_level == Level::OFF)
+ {
+ return;
+ }
+
+ const auto dot_operands = generate_dot_operands(graph, _level);
+ const auto dot_operations = generate_dot_operations(graph, dot_operands);
+ dump_to_file(dot_operands, dot_operations, tag);
+}
+
+void DotDumper::dump(const compiler::LoweredGraph &lowered_graph, const std::string &tag)
+{
+ if (_level == Level::OFF)
+ {
+ return;
+ }
+
+ auto dot_operands = generate_dot_operands(lowered_graph.graph(), _level);
+ auto dot_operations = generate_dot_operations(lowered_graph.graph(), dot_operands);
+ update_lower_info(lowered_graph, &dot_operands);
+ update_lower_info(lowered_graph, &dot_operations);
+ dump_to_file(dot_operands, dot_operations, tag);
+}
} // namespace dot
} // namespace dumper
diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h
index f300c3432..6249010d3 100644
--- a/runtime/onert/core/src/dumper/dot/DotDumper.h
+++ b/runtime/onert/core/src/dumper/dot/DotDumper.h
@@ -38,27 +38,28 @@ public:
};
public:
- DotDumper(const ir::Graph &graph, Level level)
- : _lowered_graph{nullptr}, _graph(graph), _level{level}
- {
- }
- DotDumper(const compiler::LoweredGraph *lowered_graph, Level level)
- : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level}
- {
- }
+ DotDumper(Level level) : _level{level} {}
public:
/**
- * @brief Dump to dot file as tag name if "GRAPH_DOT_DUMP" is set
+ * @brief Dump graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set
+ *
+ * @param[in] graph The graph that would be used to get operations and operands
+ * @param[in] tag The name of dot file that would be created
+ * @return N/A
+ */
+ void dump(const ir::Graph &graph, const std::string &tag);
+
+ /**
+ * @brief Dump lowered graph information to dot file as tag name if "GRAPH_DOT_DUMP" is set
*
+ * @param[in] graph The graph that would be used to get operations and operands
* @param[in] tag The name of dot file that would be created
* @return N/A
*/
- void dump(const std::string &tag);
+ void dump(const compiler::LoweredGraph &lowered_graph, const std::string &tag);
private:
- const compiler::LoweredGraph *_lowered_graph;
- const ir::Graph &_graph;
Level _level;
};
diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h
index bcac19d2e..1649be733 100644
--- a/runtime/onert/core/src/exec/DataflowExecutor.h
+++ b/runtime/onert/core/src/exec/DataflowExecutor.h
@@ -17,19 +17,18 @@
#ifndef __ONERT_EXEC_DATAFLOW_EXECUTOR_H__
#define __ONERT_EXEC_DATAFLOW_EXECUTOR_H__
-#include <list>
-#include <map>
-#include <unordered_map>
-
-#include "exec/FunctionSequence.h"
+#include "ExecutorBase.h"
#include "Job.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/Index.h"
-#include <memory>
-#include "exec/ExecutorBase.h"
+
#include "compiler/CodeMap.h"
+#include "ir/OperandIndexSequence.h"
#include "util/TracingCtx.h"
+#include <list>
+#include <map>
+#include <memory>
+#include <unordered_map>
+
namespace onert
{
namespace exec
diff --git a/runtime/onert/core/src/exec/ExecTime.cc b/runtime/onert/core/src/exec/ExecTime.cc
index 6bf2744a9..4b82655b9 100644
--- a/runtime/onert/core/src/exec/ExecTime.cc
+++ b/runtime/onert/core/src/exec/ExecTime.cc
@@ -14,12 +14,10 @@
* limitations under the License.
*/
-#include "exec/ExecTime.h"
+#include "ExecTime.h"
-#include <fstream>
-#include <cassert>
-#include <limits>
#include <algorithm>
+#include <cassert>
namespace onert
{
diff --git a/runtime/onert/core/src/exec/ExecTime.test.cc b/runtime/onert/core/src/exec/ExecTime.test.cc
new file mode 100644
index 000000000..1f7152e7b
--- /dev/null
+++ b/runtime/onert/core/src/exec/ExecTime.test.cc
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ExecTime.h"
+
+#include "backend/IConfig.h"
+#include "backend/Backend.h"
+
+#include <gtest/gtest.h>
+
+#include <string>
+
+namespace
+{
+using namespace onert;
+using namespace exec;
+using namespace backend;
+
+struct MockConfig : public IConfig
+{
+ std::string id() override { return "b1"; }
+ bool initialize() override { return true; };
+ bool supportPermutation() override { return false; }
+ ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
+ {
+ return ir::Layout::UNKNOWN;
+ }
+ bool supportDynamicTensor() override { return false; }
+ bool supportFP16() override { return false; }
+};
+
+struct MockBackend : public ::onert::backend::Backend
+{
+ std::shared_ptr<onert::backend::IConfig> config() const override
+ {
+ return std::make_shared<MockConfig>();
+ }
+ std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&) const override
+ {
+ return nullptr;
+ }
+};
+
+TEST(ExecTime, roundtrip_ok)
+{
+ const auto *b = new MockBackend();
+ std::vector<const Backend *> bs = {b};
+ {
+ ExecTime et(bs);
+ et.updateOperationExecTime(b, "op1", true, 100, 100);
+ et.updateOperationExecTime(b, "op1", true, 200, 200);
+ et.updateOperationExecTime(b, "op1", false, 100, 888);
+ et.storeOperationsExecTime();
+ }
+ {
+ ExecTime et(bs);
+ auto time = et.getOperationExecTime(b, "op1", true, 100);
+ ASSERT_EQ(time, 100);
+ // Check interpolation
+ time = et.getOperationExecTime(b, "op1", true, 150);
+ ASSERT_EQ(time, 150);
+ time = et.getOperationExecTime(b, "op1", false, 100);
+ ASSERT_EQ(time, 888);
+ et.storeOperationsExecTime();
+ }
+ // clean up
+ EXPECT_EQ(remove("exec_time.json"), 0);
+}
+
+TEST(ExecTime, structure)
+{
+
+ const auto *b = new MockBackend();
+ std::vector<const Backend *> bs = {b};
+ {
+ ExecTime et(bs);
+ et.updateOperationExecTime(b, "op1", true, 100, 100);
+ et.updateOperationExecTime(b, "op1", true, 200, 200);
+ et.storeOperationsExecTime();
+ }
+ {
+ ExecTime et(bs);
+ auto time = et.getOperationExecTime(b, "op1", true, 100);
+ ASSERT_EQ(time, 100);
+ // Check interpolation
+ time = et.getOperationExecTime(b, "op1", true, 200);
+ ASSERT_EQ(time, 200);
+ et.storeOperationsExecTime();
+ }
+ // clean up
+ EXPECT_EQ(remove("exec_time.json"), 0);
+}
+} // unnamed namespace
diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc
index 8eff73bac..9d1e06d6c 100644
--- a/runtime/onert/core/src/exec/Execution.cc
+++ b/runtime/onert/core/src/exec/Execution.cc
@@ -23,13 +23,12 @@ namespace onert
namespace exec
{
-Execution::Execution(const std::shared_ptr<ExecutorMap> &executors) : _executors{executors}
+Execution::Execution(const std::shared_ptr<Executors> &executors) : _executors{executors}
{
assert(executors != nullptr);
assert(executors->at(ir::SubgraphIndex{0}) != nullptr);
- const auto &primary_subg = primary_subgraph();
- _io_desc.inputs.resize(primary_subg.getInputs().size());
- _io_desc.outputs.resize(primary_subg.getOutputs().size());
+ _io_desc.inputs.resize(_executors->inputSize());
+ _io_desc.outputs.resize(_executors->outputSize());
sem_init(&_async_io_descs_sem, 0, 1);
}
@@ -48,8 +47,7 @@ void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_
void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t length,
ir::Layout layout)
{
- const auto input_index = primary_subgraph().getInputs().at(index);
- const auto info = primary_subgraph().operands().at(input_index).info();
+ const auto info = _executors->inputInfo(index);
// TODO handle when (!buffer && length != 0) : setting the input as an optional tensor
@@ -105,8 +103,7 @@ bool Execution::isEmptyQueue()
void Execution::executeAsyncInput(const ir::IOIndex &index, const void *buffer, size_t length,
ir::Layout layout)
{
- const auto input_index = primary_subgraph().getInputs().at(index);
- const auto info = primary_subgraph().operands().at(input_index).info();
+ const auto info = _executors->inputInfo(index);
IODescription *_async_io_desc = _async_io_descs.back().first;
{
@@ -135,8 +132,7 @@ void Execution::executeAsyncInput(const ir::IOIndex &index, const void *buffer,
void Execution::executeAsyncOutput(const ir::IOIndex &index, void *buffer, size_t length,
ir::Layout layout)
{
- const auto output_index = primary_subgraph().getOutputs().at(index);
- const auto info = primary_subgraph().operands().at(output_index).info();
+ const auto info = _executors->outputInfo(index);
IODescription *_async_io_desc = _async_io_descs.front().first;
if (length < info.total_size())
@@ -165,8 +161,7 @@ void Execution::setInput(const ir::IOIndex &index, const ir::TypeInfo &type, con
// TODO Remove default parameter
void Execution::setOutput(const ir::IOIndex &index, void *buffer, size_t length, ir::Layout layout)
{
- const auto output_index = primary_subgraph().getOutputs().at(index);
- const auto info = primary_subgraph().operands().at(output_index).info();
+ const auto info = _executors->outputInfo(index);
if (length < info.total_size())
{
@@ -208,7 +203,7 @@ void Execution::execute()
{
VERBOSE(Execution) << "Start execution" << std::endl;
- primary_executor()->execute(_io_desc);
+ _executors->execute(_io_desc);
finished = true;
VERBOSE(Execution) << "Execution finished" << std::endl;
@@ -248,8 +243,7 @@ ir::Shape Execution::getInputShape(ir::IOIndex ind) const
auto itr = _io_desc.dynamic_input_shapes.find(ind);
if (itr == _io_desc.dynamic_input_shapes.end())
{
- auto operand_idx = primary_subgraph().getInputs().at(ind);
- return primary_subgraph().operands().at(operand_idx).shape();
+ return _executors->inputInfo(ind).shape();
}
else
{
diff --git a/runtime/onert/core/src/exec/Execution.test.cc b/runtime/onert/core/src/exec/Execution.test.cc
new file mode 100644
index 000000000..e3ea49470
--- /dev/null
+++ b/runtime/onert/core/src/exec/Execution.test.cc
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/Execution.h"
+
+#include "compiler/Compiler.h"
+#include "ir/Graph.h"
+#include "ir/operation/BinaryArithmetic.h"
+#include "util/TracingCtx.h"
+
+#include <gtest/gtest.h>
+#include <thread>
+
+namespace
+{
+
+using namespace onert::ir;
+
+class CompiledMockUpModel
+{
+public:
+ CompiledMockUpModel()
+ {
+ // Model: two elementwise add operation
+ // model input: lhs, rhs1
+ // model output: second add result (result2)
+ // constant: rhs2
+ // result1 <= (lhs + rhs)
+ // result2 <= (result1 + rhs2)
+ // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
+ // activation: none (constant)
+ graph = std::make_shared<Graph>();
+ // 1st add operands (result1 <= lhs + rhs1)
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ static float rhs2_data[4] = {3, 1, -1, 5};
+ auto operand_lhs = graph->addOperand(shape, type);
+ auto operand_rhs1 = graph->addOperand(shape, type);
+ auto operand_result1 = graph->addOperand(shape, type);
+ auto operand_rhs2 = graph->addOperand(shape, type);
+ auto operand_result2 = graph->addOperand(shape, type);
+ graph->operands()
+ .at(operand_rhs2)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
+ // 2nd add operations (result2 <= result1 + rhs2)
+ operation::BinaryArithmetic::Param param1;
+ param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param1.activation = Activation::NONE;
+ auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
+ auto output_set1 = OperandIndexSequence{operand_result1};
+ graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+ operation::BinaryArithmetic::Param param2;
+ param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param2.activation = Activation::NONE;
+ auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
+ auto output_set2 = OperandIndexSequence{operand_result2};
+ graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+ // Identify model inputs and outputs
+ graph->addInput(operand_lhs);
+ graph->addInput(operand_rhs1);
+ graph->addOutput(operand_result2);
+ graph->verify();
+
+ // Compile
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(onert::ir::SubgraphIndex{0}, graph);
+ coptions = onert::compiler::CompilerOptions::fromGlobalConfig();
+ onert::compiler::Compiler compiler{model, *coptions};
+ artifact = compiler.compile();
+ }
+
+public:
+ std::shared_ptr<Graph> graph;
+ std::unique_ptr<onert::compiler::CompilerOptions> coptions;
+ std::shared_ptr<onert::compiler::CompilerArtifact> artifact;
+};
+
+TEST(ExecInstance, simple)
+{
+ auto mockup = CompiledMockUpModel();
+ auto graph = mockup.graph;
+ auto executors = mockup.artifact->_executors;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float input1_buffer[4] = {1, 0, -1, -2};
+ const float input2_buffer[4] = {1, -3, 2, -4};
+ float output_buffer[4] = {};
+ const float output_expected[4] = {5, -2, 0, -1};
+
+ onert::exec::Execution execution{executors};
+
+ execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+ execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+ execution.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+}
+
+TEST(ExecInstance, twoCompile)
+{
+ auto mockup = CompiledMockUpModel();
+ auto graph = mockup.graph;
+ auto executors1 = mockup.artifact->_executors;
+ onert::exec::Execution execution1{executors1};
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {5, -2, 0, -1};
+
+ execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+ execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+ execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+ // Make new executor: compile again
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(onert::ir::SubgraphIndex{0}, graph);
+ auto coptions = onert::compiler::CompilerOptions::fromGlobalConfig();
+ onert::compiler::Compiler compiler{model, *coptions};
+ std::shared_ptr<onert::compiler::CompilerArtifact> artifact = compiler.compile();
+ onert::exec::Execution execution2{artifact->_executors};
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+ const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+ execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+ execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+ execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+ execution1.execute();
+ execution2.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+}
+
+// Support two initialized execution instance then ordered execution
+TEST(ExecInstance, twoExecution)
+{
+ auto mockup = CompiledMockUpModel();
+ auto executors = mockup.artifact->_executors;
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output1 = IOIndex{0};
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {5, -2, 0, -1};
+ const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+ onert::exec::Execution execution1{executors};
+ execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
+ execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
+ execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+
+ // Make new execution
+ onert::exec::Execution execution2{executors};
+ execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
+ execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
+ execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
+
+ execution1.execute();
+ execution2.execute();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+}
+
+class Inference
+{
+public:
+ Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
+ std::shared_ptr<onert::exec::Executors> &executors)
+ : _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
+ {
+ // DO NOTHING
+ }
+
+ void inference(void)
+ {
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output1 = IOIndex{0};
+
+ onert::exec::Execution execution{_executors};
+ execution.setInput(input1, reinterpret_cast<const void *>(_input1), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(_input2), 16);
+ execution.setOutput(output1, reinterpret_cast<void *>(_output), 16);
+
+ execution.execute();
+ }
+
+private:
+ const float (&_input1)[4];
+ const float (&_input2)[4];
+ float (&_output)[4];
+ std::shared_ptr<onert::exec::Executors> &_executors;
+};
+
+// Support multi-thread execution
+TEST(ExecInstance, twoThreads)
+{
+ auto mockup = CompiledMockUpModel();
+ auto executors = mockup.artifact->_executors;
+
+ const float exe1_input1_buffer[4] = {1, 0, -1, -2};
+ const float exe1_input2_buffer[4] = {1, -3, 2, -4};
+ float exe1_output_buffer[4] = {};
+ const float exe1_output_expected[4] = {5, -2, 0, -1};
+
+ Inference execution1{exe1_input1_buffer, exe1_input2_buffer, exe1_output_buffer, executors};
+
+ const float exe2_input1_buffer[4] = {2, 1, -2, 0};
+ const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
+ float exe2_output_buffer[4] = {};
+ const float exe2_output_expected[4] = {2, 5, -2, 7};
+
+ Inference execution2{exe2_input1_buffer, exe2_input2_buffer, exe2_output_buffer, executors};
+
+ std::thread t1{&Inference::inference, &execution1};
+ std::thread t2{&Inference::inference, &execution2};
+
+ t1.join();
+ t2.join();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
+ EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
+ }
+}
+
+// Support asynchronous execution
+TEST(ExecInstance, async)
+{
+ auto mockup = CompiledMockUpModel();
+ auto graph = mockup.graph;
+ auto executors = mockup.artifact->_executors;
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto output = IOIndex{0};
+
+ const float input1_buffer[4] = {1, 0, -1, -2};
+ const float input2_buffer[4] = {1, -3, 2, -4};
+ float output_buffer[4] = {};
+ const float output_expected[4] = {5, -2, 0, -1};
+
+ onert::exec::Execution execution{executors};
+
+ execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
+ execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
+ execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
+ execution.startExecute();
+ execution.waitFinish();
+
+ for (auto i = 0; i < 4; i++)
+ {
+ EXPECT_EQ(output_buffer[i], output_expected[i]);
+ }
+}
+
+} // namespace
diff --git a/runtime/onert/core/src/exec/ExecutionObservee.h b/runtime/onert/core/src/exec/ExecutionObservee.h
index 423b5026b..3ee1754c9 100644
--- a/runtime/onert/core/src/exec/ExecutionObservee.h
+++ b/runtime/onert/core/src/exec/ExecutionObservee.h
@@ -17,11 +17,12 @@
#ifndef __ONERT_EXEC_EXECUTION_OBSERVEE_H__
#define __ONERT_EXEC_EXECUTION_OBSERVEE_H__
-#include <list>
+#include "ExecutionObservers.h"
-#include "exec/ExecutionObservers.h"
#include "ir/Index.h"
+#include <list>
+
namespace onert
{
namespace exec
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.cc b/runtime/onert/core/src/exec/ExecutionObservers.cc
index 386178ae6..9abde7ba4 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.cc
+++ b/runtime/onert/core/src/exec/ExecutionObservers.cc
@@ -14,16 +14,16 @@
* limitations under the License.
*/
-#include "exec/ExecutionObservers.h"
+#include "ExecutionObservers.h"
-#include <string>
-#include <sstream>
+#include "../util/EventWriter.h"
#include "util/logging.h"
-#include "exec/IExecutor.h"
-#include "misc/polymorphic_downcast.h"
-#include "ir/Operation.h"
-#include "util/EventWriter.h"
+
+#include <misc/polymorphic_downcast.h>
+
+#include <string>
+#include <sstream>
namespace
{
diff --git a/runtime/onert/core/src/exec/ExecutionObservers.h b/runtime/onert/core/src/exec/ExecutionObservers.h
index 4c6c7b18e..1aadac2f5 100644
--- a/runtime/onert/core/src/exec/ExecutionObservers.h
+++ b/runtime/onert/core/src/exec/ExecutionObservers.h
@@ -17,17 +17,16 @@
#ifndef __ONERT_EXEC_OBSREVERS_H__
#define __ONERT_EXEC_OBSREVERS_H__
-#include "exec/IFunction.h"
+#include "ExecTime.h"
+#include "../util/EventCollector.h"
+#include "../util/EventRecorder.h"
+#include "../util/EventWriter.h"
+
+#include "exec/Executors.h"
#include "ir/Index.h"
#include "ir/Operation.h"
-#include "ExecTime.h"
#include "util/ITimer.h"
-#include "exec/IExecutor.h"
-#include "util/EventCollector.h"
-#include "util/EventRecorder.h"
-#include "util/EventWriter.h"
#include "util/TracingCtx.h"
-#include "util/EventWriter.h"
namespace onert
{
diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc
index efc22cfa5..d2d204a0b 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.cc
+++ b/runtime/onert/core/src/exec/ExecutorBase.cc
@@ -15,11 +15,10 @@
*/
#include "ExecutorBase.h"
+
#include "ShapeConverter.h"
-#include "backend/builtin/UserTensor.h"
-#include "util/logging.h"
-#include "misc/polymorphic_downcast.h"
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h
index c0f609d11..e4f914546 100644
--- a/runtime/onert/core/src/exec/ExecutorBase.h
+++ b/runtime/onert/core/src/exec/ExecutorBase.h
@@ -17,22 +17,17 @@
#ifndef __ONERT_EXEC_EXECUTOR_BASE_H__
#define __ONERT_EXEC_EXECUTOR_BASE_H__
-#include "IPermuteFunction.h"
+#include "ExecutionObservee.h"
+#include "../backend/builtin/IOTensor.h"
+#include "../compiler/TensorRegistries.h"
+
+#include "compiler/LoweredGraph.h"
#include "exec/IExecutor.h"
-#include "exec/ExecTime.h"
-#include "exec/ExecutionObservee.h"
-#include "exec/IFunction.h"
#include "exec/IODescription.h"
#include "ir/Graph.h"
-#include "ir/Index.h"
-#include "compiler/GraphLowerInfo.h"
#include "ir/OperationIndexMap.h"
-#include "compiler/LoweredGraph.h"
-#include "compiler/TensorRegistries.h"
-#include "backend/builtin/IOTensor.h"
#include "util/TracingCtx.h"
-#include <cstdint>
#include <memory>
#include <mutex>
#include <vector>
diff --git a/runtime/onert/core/src/exec/Executors.cc b/runtime/onert/core/src/exec/Executors.cc
new file mode 100644
index 000000000..e0ee24fea
--- /dev/null
+++ b/runtime/onert/core/src/exec/Executors.cc
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exec/Executors.h"
+
+namespace onert
+{
+namespace exec
+{
+
+uint32_t Executors::inputSize() const
+{
+ return _model_edges ? _model_edges->pkg_inputs.size()
+ : _executors.at(ir::SubgraphIndex{0})->graph().getInputs().size();
+}
+
+uint32_t Executors::outputSize() const
+{
+ return _model_edges ? _model_edges->pkg_outputs.size()
+ : _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().size();
+}
+
+const ir::OperandInfo Executors::inputInfo(const ir::IOIndex &index)
+{
+ if (_model_edges)
+ {
+ // Assume that each model may have only one subgraph
+ // TODO handle general case
+ const auto desc = _model_edges->pkg_inputs[index.value()];
+ const auto model_idx = std::get<0>(desc);
+ const auto executor_idx = ir::SubgraphIndex{model_idx.value()};
+ const auto input_index = _executors.at(executor_idx)->graph().getInputs().at(std::get<2>(desc));
+ return _executors.at(executor_idx)->graph().operands().at(input_index).info();
+ }
+
+ const auto input_index = _executors.at(ir::SubgraphIndex{0})->graph().getInputs().at(index);
+ return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(input_index).info();
+}
+
+const ir::OperandInfo Executors::outputInfo(const ir::IOIndex &index)
+{
+ if (_model_edges)
+ {
+ // Assume that each model may have only one subgraph
+ // TODO handle general case
+ auto desc = _model_edges->pkg_outputs[index.value()];
+ auto model_idx = std::get<0>(desc);
+ auto executor_idx = ir::SubgraphIndex{model_idx.value()};
+ auto output_index = _executors.at(executor_idx)->graph().getOutputs().at(std::get<2>(desc));
+ return _executors.at(executor_idx)->graph().operands().at(output_index).info();
+ }
+
+ auto output_index = _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(index);
+ return _executors.at(ir::SubgraphIndex{0})->graph().operands().at(output_index).info();
+}
+
+void Executors::execute(const IODescription &desc)
+{
+ if (_model_edges)
+ return executeEntries(desc);
+
+ _executors.at(ir::SubgraphIndex{0})->execute(desc);
+}
+
+void Executors::executeEntries(const IODescription &desc)
+{
+ // Assume 2 executors only
+ // Assume that each model may have only one subgraph
+ // TODO Support general case
+ if (_executors.size() != 2)
+ throw std::runtime_error{"NYI: Multi model execution for this package is not supported yet"};
+
+ // Assume all edges are 0:0:x -> 1:0:x
+ for (auto edge : _model_edges->edges)
+ {
+ if ((std::get<ir::ModelIndex>(edge.from) != ir::ModelIndex{0}) ||
+ (std::get<ir::ModelIndex>(edge.to) != ir::ModelIndex{1}) ||
+ (std::get<ir::SubgraphIndex>(edge.from) != ir::SubgraphIndex{0}) ||
+ (std::get<ir::SubgraphIndex>(edge.to) != ir::SubgraphIndex{0}) ||
+ (std::get<ir::IOIndex>(edge.from) != std::get<ir::IOIndex>(edge.to)))
+ throw std::runtime_error{"NYI: Multi model execution for this edge is not supported yet"};
+ }
+
+ // Assume all package inputs are 0:0:x
+ for (uint32_t i = 0; i < _model_edges->pkg_inputs.size(); i++)
+ {
+ auto input = _model_edges->pkg_inputs[i];
+ if ((std::get<ir::ModelIndex>(input) != ir::ModelIndex{0}) ||
+ (std::get<ir::SubgraphIndex>(input) != ir::SubgraphIndex{0}) ||
+ (std::get<ir::IOIndex>(input) != ir::IOIndex{i}))
+ {
+ throw std::runtime_error{"NYI: Support package input to 1st model with same order"};
+ }
+ }
+
+ // Assume all package outputs are 1:0:x
+ for (uint32_t i = 0; i < _model_edges->pkg_outputs.size(); i++)
+ {
+ auto output = _model_edges->pkg_outputs[i];
+ if ((std::get<ir::ModelIndex>(output) != ir::ModelIndex{1}) ||
+ (std::get<ir::SubgraphIndex>(output) != ir::SubgraphIndex{0}) ||
+ (std::get<ir::IOIndex>(output) != ir::IOIndex{i}))
+ {
+ throw std::runtime_error{"NYI: Support package output from 2nd model with same order"};
+ }
+ }
+
+ const auto &executor1 = _executors.at(ir::SubgraphIndex{0});
+ const auto &graph1 = executor1->graph();
+ const auto &executor2 = _executors.at(ir::SubgraphIndex{1});
+ const auto &graph2 = executor2->graph();
+
+ if ((graph1.getInputs().size() != _model_edges->pkg_inputs.size()) ||
+ (graph2.getOutputs().size() != _model_edges->pkg_outputs.size()) ||
+ (graph1.getOutputs().size() != graph2.getInputs().size()) ||
+ (graph1.getOutputs().size() != _model_edges->edges.size()))
+ {
+ throw std::runtime_error{"NYI: Unsupported model edge pattern"};
+ }
+
+ // Prepare buffer
+ // Assume buffer layout is NHWC
+ std::vector<std::unique_ptr<uint8_t[]>> bufs(_model_edges->edges.size());
+ std::vector<const ir::OperandInfo *> buf_infos(_model_edges->edges.size());
+ const auto layout = ir::Layout::NHWC;
+
+ for (uint32_t i = 0; i < graph1.getOutputs().size(); i++)
+ {
+ const auto buf_index =
+ _executors.at(ir::SubgraphIndex{0})->graph().getOutputs().at(ir::IOIndex{i});
+ buf_infos[i] = &_executors.at(ir::SubgraphIndex{0})->graph().operands().at(buf_index).info();
+ const auto buf_size = buf_infos[i]->total_size();
+ bufs[i] = std::make_unique<uint8_t[]>(buf_size);
+ }
+
+ // 1st executor
+ {
+ IODescription desc1;
+ const auto input_size = graph1.getInputs().size();
+ const auto output_size = graph1.getOutputs().size();
+ desc1.inputs.resize(input_size);
+ desc1.outputs.resize(output_size);
+ for (uint32_t i = 0; i < input_size; i++)
+ desc1.inputs[i] = std::make_unique<InputDesc>(*desc.inputs[i].get());
+ for (uint32_t i = 0; i < output_size; i++)
+ desc1.outputs[i] = std::make_unique<OutputDesc>(*buf_infos[i], bufs[i].get(),
+ buf_infos[i]->total_size(), layout);
+
+ executor1->execute(desc1);
+ }
+
+ // 2nd executor
+ {
+ IODescription desc2;
+ const auto input_size = graph2.getInputs().size();
+ const auto output_size = graph2.getOutputs().size();
+ desc2.inputs.resize(input_size);
+ desc2.outputs.resize(output_size);
+ for (uint32_t i = 0; i < input_size; i++)
+ desc2.inputs[i] = std::make_unique<InputDesc>(*buf_infos[i], bufs[i].get(),
+ buf_infos[i]->total_size(), layout);
+ for (uint32_t i = 0; i < output_size; i++)
+ desc2.outputs[i] = std::make_unique<OutputDesc>(*desc.outputs[i].get());
+
+ executor2->execute(desc2);
+ }
+}
+
+} // namespace exec
+} // namespace onert
diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc
index df68b1b64..f87c271f7 100644
--- a/runtime/onert/core/src/exec/FunctionSequence.cc
+++ b/runtime/onert/core/src/exec/FunctionSequence.cc
@@ -34,9 +34,7 @@ void FunctionSequence::run()
// Thus, those two bakends cannot reach here.
// Do dynamic shape inference
- auto op_ind = _dynamic_tensor_ctx->op_ind;
- auto &op = _dynamic_tensor_ctx->operations->at(op_ind);
- op.accept(*_dynamic_tensor_ctx->dynamic_shape_inferer);
+ _dynamic_tensor_ctx->op->accept(*_dynamic_tensor_ctx->dynamic_shape_inferer);
for (const auto &function : _functions)
{
diff --git a/runtime/onert/core/src/exec/JSONExecTime.cc b/runtime/onert/core/src/exec/JSONExecTime.cc
index b29216a2f..d149345fd 100644
--- a/runtime/onert/core/src/exec/JSONExecTime.cc
+++ b/runtime/onert/core/src/exec/JSONExecTime.cc
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#include "exec/JSONExecTime.h"
-#include "backend/IConfig.h"
+#include "JSONExecTime.h"
+
#include <fstream>
namespace onert
diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h
index 39d653154..a833466da 100644
--- a/runtime/onert/core/src/exec/LinearExecutor.h
+++ b/runtime/onert/core/src/exec/LinearExecutor.h
@@ -22,11 +22,10 @@
#ifndef __ONERT_EXEC_EXECUTOR_H_
#define __ONERT_EXEC_EXECUTOR_H_
-#include "ir/Index.h"
#include "ExecutorBase.h"
-#include "compiler/Linear.h"
-#include "exec/FunctionSequence.h"
+
#include "compiler/CodeMap.h"
+#include "ir/Index.h"
#include "util/TracingCtx.h"
namespace onert
diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h
index 7f107fa22..7d459b0b4 100644
--- a/runtime/onert/core/src/exec/ParallelExecutor.h
+++ b/runtime/onert/core/src/exec/ParallelExecutor.h
@@ -17,19 +17,13 @@
#ifndef __ONERT_EXEC_PARALLEL_EXECUTOR_H__
#define __ONERT_EXEC_PARALLEL_EXECUTOR_H__
-#include <list>
-#include <queue>
-#include <unordered_map>
-
-#include "exec/FunctionSequence.h"
-#include "Job.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/Index.h"
-#include <memory>
-#include "exec/DataflowExecutor.h"
+#include "DataflowExecutor.h"
#include "ParallelScheduler.h"
+
#include "util/TracingCtx.h"
+#include <memory>
+
namespace onert
{
namespace exec
diff --git a/runtime/onert/core/src/exec/feature/MockTensor.h b/runtime/onert/core/src/exec/feature/MockTensor.h
new file mode 100644
index 000000000..1d2d375e2
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/MockTensor.h
@@ -0,0 +1,66 @@
+
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/ITensor.h"
+
+template <typename T> class MockTensor : public onert::backend::ITensor
+{
+public:
+ MockTensor<T>(onert::ir::Shape &shape, T *buf, onert::ir::Layout layout)
+ : _buf(reinterpret_cast<uint8_t *>(buf)), _shape(shape), _layout(layout)
+ {
+ }
+
+public:
+ uint8_t *buffer() const override { return _buf; }
+
+ size_t calcOffset(const onert::ir::Coordinates &coords) const override
+ {
+ size_t rank = _shape.rank();
+ rank = rank == 0 ? 1 : rank;
+ size_t offset = 0;
+ for (size_t i = 0; i < rank; ++i)
+ {
+ auto dim = _shape.rank() == 0 ? 1 : _shape.dim(i);
+ offset = offset * dim + coords[i];
+ }
+ offset *= sizeof(T);
+
+ return offset;
+ }
+
+ onert::ir::Shape getShape() const override { return _shape; }
+
+public: // DUMMY methods
+ size_t total_size() const override { return 0; }
+ onert::ir::Layout layout() const override { return _layout; }
+ onert::ir::DataType data_type() const override { return onert::ir::DataType::UINT8; }
+ float data_scale() const override { return 0; }
+ int32_t data_zero_point() const override { return 0; }
+ const std::vector<float> &data_scales() const override { return _dummy_scales; }
+ const std::vector<int32_t> &data_zero_points() const override { return _dummy_zerops; }
+ bool has_padding() const override { return false; }
+ void access(const std::function<void(ITensor &tensor)> &fn) override {}
+ bool is_dynamic() const override { return false; }
+
+private:
+ uint8_t *_buf = nullptr;
+ onert::ir::Shape _shape;
+ onert::ir::Layout _layout = onert::ir::Layout::UNKNOWN;
+ std::vector<float> _dummy_scales;
+ std::vector<int32_t> _dummy_zerops;
+};
diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc
new file mode 100644
index 000000000..f439cafb5
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nchw/Reader.test.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reader.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class Reader_nchw : public testing::Test
+{
+public:
+ void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+ void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ _shape = onert::ir::FeatureShape(batch, depth, height, width);
+ }
+
+ void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ auto elem_size = sizeof(T);
+ _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+ width * elem_size);
+ }
+
+ void createReader()
+ {
+ _reader =
+ std::make_shared<nchw::Reader<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+ }
+
+ void createUsingMockTensor()
+ {
+ onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+ _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NCHW);
+ _reader = std::make_shared<nchw::Reader<T>>(_tensor.get());
+ }
+
+ std::shared_ptr<Reader<T>> _reader = nullptr;
+
+private:
+ std::shared_ptr<std::vector<T>> _data = nullptr;
+ onert::ir::FeatureShape _shape;
+ onert::ir::FeatureShape _stride;
+ std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ReaderTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(Reader_nchw, ReaderTypes);
+
+TYPED_TEST(Reader_nchw, basic_reader)
+{
+ this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ this->setShape(1, 2, 3, 2);
+ this->setStride(12, 6, 2, 1);
+ this->createReader();
+
+ // Data: NCHW
+ // Shape: NCHW
+ ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8);
+ ASSERT_EQ(this->_reader->at(1, 1, 0), 8);
+
+ // Data: NCHW
+ // Shape: NCHW
+ this->createUsingMockTensor();
+
+ ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6);
+ ASSERT_EQ(this->_reader->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nchw/View.test.cc b/runtime/onert/core/src/exec/feature/nchw/View.test.cc
new file mode 100644
index 000000000..c6dcda710
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nchw/View.test.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "View.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class View_nchw : public testing::Test
+{
+public:
+ void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+ void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ _shape = onert::ir::FeatureShape(batch, depth, height, width);
+ }
+
+ void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ auto elem_size = sizeof(T);
+ _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+ width * elem_size);
+ }
+
+ void createView()
+ {
+ _view =
+ std::make_shared<nchw::View<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+ }
+
+ void createUsingMockTensor()
+ {
+ onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+ _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NCHW);
+ _view = std::make_shared<nchw::View<T>>(_tensor.get());
+ }
+
+ std::shared_ptr<nchw::View<T>> _view = nullptr;
+
+private:
+ std::shared_ptr<std::vector<T>> _data = nullptr;
+ onert::ir::FeatureShape _shape;
+ onert::ir::FeatureShape _stride;
+ std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ViewTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(View_nchw, ViewTypes);
+
+TYPED_TEST(View_nchw, basic_view)
+{
+ this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ this->setShape(1, 2, 3, 2);
+ this->setStride(12, 6, 2, 1);
+ this->createView();
+
+ // Data: NCHW
+ // Shape: NCHW
+ ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8);
+ ASSERT_EQ(this->_view->at(1, 1, 0), 8);
+
+ // Data: NCHW
+ // Shape: NCHW
+ this->createUsingMockTensor();
+
+ ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6);
+ ASSERT_EQ(this->_view->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc
new file mode 100644
index 000000000..773199042
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Reader.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class Reader_nhwc : public testing::Test
+{
+public:
+ void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+ void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ _shape = onert::ir::FeatureShape(batch, depth, height, width);
+ }
+
+ void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ auto elem_size = sizeof(T);
+ _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+ width * elem_size);
+ }
+
+ void createReader()
+ {
+ _reader =
+ std::make_shared<nhwc::Reader<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+ }
+
+ void createUsingMockTensor()
+ {
+ onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+ _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NHWC);
+ _reader = std::make_shared<nhwc::Reader<T>>(_tensor.get());
+ }
+
+ std::shared_ptr<nhwc::Reader<T>> _reader = nullptr;
+
+private:
+ std::shared_ptr<std::vector<T>> _data = nullptr;
+ onert::ir::FeatureShape _shape;
+ onert::ir::FeatureShape _stride;
+ std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ReaderTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(Reader_nhwc, ReaderTypes);
+TYPED_TEST_SUITE(MockTensorReader_nhwc, ReaderTypes);
+
+TYPED_TEST(Reader_nhwc, basic_reader)
+{
+ this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ this->setShape(1, 2, 3, 2);
+ this->setStride(12, 1, 6, 2);
+ this->createReader();
+
+ // Data: NCHW
+ // Shape: NHWC
+ ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 8);
+ ASSERT_EQ(this->_reader->at(1, 1, 0), 8);
+
+ // Data: NHWC
+ // Shape: NHWC
+ this->createUsingMockTensor();
+
+ ASSERT_EQ(this->_reader->at(0, 1, 1, 0), 6);
+ ASSERT_EQ(this->_reader->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.h b/runtime/onert/core/src/exec/feature/nhwc/View.h
index 40d1d237c..c98d050c3 100644
--- a/runtime/onert/core/src/exec/feature/nhwc/View.h
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_EXEC_FEATURE_NHWC_VIEW_H__
#define __ONERT_EXEC_FEATURE_NHWC_VIEW_H__
-#include "../Reader.h"
+#include "Reader.h"
#include <cassert>
#include <cstddef>
diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.test.cc b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc
new file mode 100644
index 000000000..bdd73d5a7
--- /dev/null
+++ b/runtime/onert/core/src/exec/feature/nhwc/View.test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "View.h"
+
+#include "../MockTensor.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::exec::feature;
+
+template <typename T> class View_nhwc : public testing::Test
+{
+public:
+ void setData(std::initializer_list<T> list) { _data = std::make_shared<std::vector<T>>(list); }
+
+ void setShape(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ _shape = onert::ir::FeatureShape(batch, depth, height, width);
+ }
+
+ void setStride(int32_t batch, int32_t depth, int32_t height, int32_t width)
+ {
+ auto elem_size = sizeof(T);
+ _stride = onert::ir::FeatureShape(batch * elem_size, depth * elem_size, height * elem_size,
+ width * elem_size);
+ }
+
+ void createView()
+ {
+ _view =
+ std::make_shared<nhwc::View<T>>(_shape, _stride, _data->data(), _data->size() * sizeof(T));
+ }
+
+ void createUsingMockTensor()
+ {
+ onert::ir::Shape shape = {_shape.N, _shape.H, _shape.W, _shape.C};
+ _tensor = std::make_shared<MockTensor<T>>(shape, _data->data(), onert::ir::Layout::NHWC);
+ _view = std::make_shared<nhwc::View<T>>(_tensor.get());
+ }
+
+ std::shared_ptr<nhwc::View<T>> _view = nullptr;
+
+private:
+ std::shared_ptr<std::vector<T>> _data = nullptr;
+ onert::ir::FeatureShape _shape;
+ onert::ir::FeatureShape _stride;
+ std::shared_ptr<MockTensor<T>> _tensor = nullptr;
+};
+
+using ViewTypes = ::testing::Types<float, int32_t, uint8_t, int8_t, int16_t>;
+TYPED_TEST_SUITE(View_nhwc, ViewTypes);
+TYPED_TEST_SUITE(MockTensorView_nhwc, ViewTypes);
+
+TYPED_TEST(View_nhwc, basic_view)
+{
+ this->setData({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});
+ this->setShape(1, 2, 3, 2);
+ this->setStride(12, 1, 6, 2);
+ this->createView();
+
+ // Data: NCHW
+ // Shape: NHWC
+ ASSERT_EQ(this->_view->at(0, 1, 1, 0), 8);
+ ASSERT_EQ(this->_view->at(1, 1, 0), 8);
+
+ // Data: NHWC
+ // Shape: NHWC
+ this->createUsingMockTensor();
+
+ ASSERT_EQ(this->_view->at(0, 1, 1, 0), 6);
+ ASSERT_EQ(this->_view->at(1, 1, 0), 6);
+}
diff --git a/runtime/onert/core/src/interp/InterpExecutor.cc b/runtime/onert/core/src/interp/InterpExecutor.cc
index 44d1575d7..f04777174 100644
--- a/runtime/onert/core/src/interp/InterpExecutor.cc
+++ b/runtime/onert/core/src/interp/InterpExecutor.cc
@@ -14,9 +14,10 @@
* limitations under the License.
*/
-#include "interp/InterpExecutor.h"
-#include "interp/ExecEnv.h"
-#include "interp/Interpreter.h"
+#include "InterpExecutor.h"
+
+#include "ExecEnv.h"
+#include "Interpreter.h"
#include "util/logging.h"
diff --git a/runtime/onert/core/src/interp/InterpExecutor.h b/runtime/onert/core/src/interp/InterpExecutor.h
index df6153d09..d6d5dd0a3 100644
--- a/runtime/onert/core/src/interp/InterpExecutor.h
+++ b/runtime/onert/core/src/interp/InterpExecutor.h
@@ -74,7 +74,12 @@ public:
}
private:
- const ir::Graph &_graph;
+ /**
+ * @brief Copy of target graph for lowering
+ * @note It uses copy of graph, not reference.
+ * Original graph may be deallocated by frontend.
+ */
+ const ir::Graph _graph;
ir::OperandIndexMap<std::shared_ptr<ITensor>> _tensor_map;
};
diff --git a/runtime/onert/core/src/interp/InterpExecutor.test.cc b/runtime/onert/core/src/interp/InterpExecutor.test.cc
new file mode 100644
index 000000000..9f95ffee0
--- /dev/null
+++ b/runtime/onert/core/src/interp/InterpExecutor.test.cc
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InterpExecutor.h"
+
+#include "exec/Execution.h"
+#include "ir/Graph.h"
+#include "ir/operation/BinaryArithmetic.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+namespace
+{
+
+using namespace onert::ir;
+using InterpExecutor = onert::interp::InterpExecutor;
+using Execution = onert::exec::Execution;
+using Executors = onert::exec::Executors;
+
+class InterpExecutorTest : public ::testing::Test
+{
+protected:
+ virtual void SetUp() {}
+ void CreateSimpleModel()
+ {
+ // Model: one elementwise add operation
+ // model input: lhs, rhs
+ // model output: add result
+ // lhs, rhs, result shape: {1, 2, 2, 1}
+ // activation: none (constant)
+ _graph = std::make_unique<Graph>();
+
+ // Add operands
+
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::INT32};
+ Shape shape_scalar(0);
+ TypeInfo type_scalar{DataType::INT32};
+
+ auto operand_lhs = _graph->addOperand(shape, type);
+ auto operand_rhs = _graph->addOperand(shape, type);
+ auto operand_result = _graph->addOperand(shape, type);
+
+ // Add operations
+
+ operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param.activation = Activation::NONE;
+ auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
+ auto output_set = OperandIndexSequence{operand_result};
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
+
+ // Identify model inputs and outputs
+
+ _graph->getInputs().append(operand_lhs);
+ _graph->getInputs().append(operand_rhs);
+ _graph->getOutputs().append(operand_result);
+
+ _graph->verify();
+
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(onert::ir::SubgraphIndex{0}, _graph);
+
+ _executors = std::make_shared<Executors>();
+ _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
+ }
+
+ void CreateTwoStepModel()
+ {
+ // Model: two elementwise add operation
+ // model input: lhs, rhs1
+ // model output: second add result (result2)
+ // constant: rhs2
+ // result1 <= (lhs + rhs)
+ // result2 <= (result1 + rhs2)
+ // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
+ // activation: none (constant)
+ _graph = std::make_unique<Graph>();
+
+ // 1st add operands (result1 <= lhs + rhs1)
+
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::INT32};
+ Shape shape_scalar(0);
+ TypeInfo type_scalar{DataType::INT32};
+
+ static int32_t rhs2_data[4] = {3, 1, -1, 5};
+
+ auto operand_lhs = _graph->addOperand(shape, type);
+ auto operand_rhs1 = _graph->addOperand(shape, type);
+ auto operand_result1 = _graph->addOperand(shape, type);
+ auto operand_rhs2 = _graph->addOperand(shape, type);
+ auto operand_result2 = _graph->addOperand(shape, type);
+ _graph->operands()
+ .at(operand_rhs2)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
+
+ // 2nd add operations (result2 <= result1 + rhs2)
+
+ operation::BinaryArithmetic::Param param1;
+ param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param1.activation = Activation::NONE;
+ auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
+ auto output_set1 = OperandIndexSequence{operand_result1};
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
+
+ operation::BinaryArithmetic::Param param2;
+ param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param2.activation = Activation::NONE;
+ auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
+ auto output_set2 = OperandIndexSequence{operand_result2};
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
+
+ // Identify model inputs and outputs
+
+ _graph->getInputs().append(operand_lhs);
+ _graph->getInputs().append(operand_rhs1);
+ _graph->getOutputs().append(operand_result2);
+
+ _graph->verify();
+
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(onert::ir::SubgraphIndex{0}, _graph);
+
+ _executors = std::make_shared<Executors>();
+ _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
+ }
+
+ void CreateUnspecifiedDimensionsModel()
+ {
+ // Model: one elementwise add operation
+ // model input: lhs, rhs
+ // model output: add result
+ // lhs, rhs, result shape: {1, unknown, 2, 1}
+ // activation: none (constant)
+ _graph = std::make_unique<Graph>();
+
+ // Add operands
+
+ Shape shape{1, 0, 2, 1};
+ TypeInfo type{DataType::INT32};
+ Shape shape_scalar(0);
+ TypeInfo type_scalar{DataType::INT32};
+
+ auto operand_lhs = _graph->addOperand(shape, type);
+ auto operand_rhs = _graph->addOperand(shape, type);
+
+ auto operand_activation = _graph->addOperand(shape_scalar, type_scalar);
+ _graph->operands()
+ .at(operand_activation)
+ .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
+
+ auto operand_result = _graph->addOperand(shape, type);
+
+ // Add operations
+
+ operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param.activation = Activation::NONE;
+ auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
+ auto output_set = OperandIndexSequence{operand_result};
+ _graph->addOperation(
+ std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
+
+ // Identify model inputs and outputs
+
+ _graph->getInputs().append(operand_lhs);
+ _graph->getInputs().append(operand_rhs);
+ _graph->getOutputs().append(operand_result);
+
+ _graph->verify();
+
+ auto model = std::make_shared<onert::ir::Model>();
+ model->push(onert::ir::SubgraphIndex{0}, _graph);
+
+ _executors = std::make_shared<Executors>();
+ _executors->emplace(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph));
+ }
+
+ void createExecution() { _execution = std::make_unique<Execution>(_executors); }
+
+ virtual void TearDown() { _executors = nullptr; }
+
+ std::shared_ptr<Graph> _graph{nullptr};
+ std::shared_ptr<Executors> _executors{nullptr};
+ std::unique_ptr<Execution> _execution{nullptr};
+ const int32_t _activation_value{0};
+};
+
+TEST_F(InterpExecutorTest, create_empty)
+{
+ Graph graph;
+ graph.verify();
+ auto executor = std::make_unique<InterpExecutor>(graph);
+ ASSERT_NE(executor, nullptr);
+}
+
+TEST_F(InterpExecutorTest, create_simple)
+{
+ CreateSimpleModel();
+ ASSERT_NE(_executors, nullptr);
+ ASSERT_NE(_executors->at(onert::ir::SubgraphIndex{0}), nullptr);
+}
+
+TEST_F(InterpExecutorTest, neg_setInput)
+{
+ CreateSimpleModel();
+ createExecution();
+
+ auto input1 = IOIndex{0};
+ const int32_t input1_buffer[4] = {1, 0, -1, -2};
+
+ EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 4),
+ std::runtime_error);
+ EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 12),
+ std::runtime_error);
+ EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
+}
+
+TEST_F(InterpExecutorTest, neg_setOutput)
+{
+ CreateSimpleModel();
+ createExecution();
+
+ auto output = IOIndex{0};
+ auto output_idx = _graph->getOutputs().at(output);
+
+ int32_t output_buffer[4] = {};
+
+ EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 4),
+ std::runtime_error);
+ EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 12),
+ std::runtime_error);
+ EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
+}
+
+TEST_F(InterpExecutorTest, neg_setInputForUnspecifiedDimensions)
+{
+ CreateUnspecifiedDimensionsModel();
+ createExecution();
+
+ auto input1 = IOIndex{0};
+ const int32_t input1_buffer[4] = {1, 0, -1, -2};
+
+ TypeInfo operand_type{DataType::INT32};
+ Shape operand_shape{1, 2, 2, 1};
+
+ EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
+ reinterpret_cast<const void *>(input1_buffer), 4),
+ std::runtime_error);
+ EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
+ reinterpret_cast<const void *>(input1_buffer), 12),
+ std::runtime_error);
+ EXPECT_NO_THROW(_execution->setInput(input1, operand_type, operand_shape,
+ reinterpret_cast<const void *>(input1_buffer), 16));
+}
+
+TEST_F(InterpExecutorTest, neg_setOutputForUnspecifiedDimensions)
+{
+ CreateUnspecifiedDimensionsModel();
+ createExecution();
+
+ auto output = IOIndex{0};
+ auto output_idx = _graph->getOutputs().at(output);
+
+ TypeInfo operand_type{DataType::INT32};
+ Shape operand_shape{1, 2, 2, 1};
+
+ int32_t output_buffer[4] = {};
+
+ EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
+ reinterpret_cast<void *>(output_buffer), 4),
+ std::runtime_error);
+ EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
+ reinterpret_cast<void *>(output_buffer), 12),
+ std::runtime_error);
+ EXPECT_NO_THROW(_execution->setOutput(output, operand_type, operand_shape,
+ reinterpret_cast<void *>(output_buffer), 16));
+}
+
+TEST_F(InterpExecutorTest, execute)
+{
+ CreateSimpleModel();
+ createExecution();
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto input1_idx = _graph->getInputs().at(input1);
+ auto input2_idx = _graph->getInputs().at(input2);
+
+ const int32_t input1_buffer[4] = {1, 0, -1, -2};
+ const int32_t input2_buffer[4] = {1, -3, 2, -4};
+
+ auto output = IOIndex{0};
+ auto output_idx = _graph->getOutputs().at(output);
+
+ int32_t output_buffer[4] = {};
+
+ EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
+ EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
+ EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
+ EXPECT_NO_THROW(_execution->execute());
+ EXPECT_EQ(output_buffer[0], 2);
+ EXPECT_EQ(output_buffer[1], -3);
+ EXPECT_EQ(output_buffer[2], 1);
+ EXPECT_EQ(output_buffer[3], -6);
+}
+
+TEST_F(InterpExecutorTest, executeTwoStep)
+{
+ CreateTwoStepModel();
+ createExecution();
+
+ auto input1 = IOIndex{0};
+ auto input2 = IOIndex{1};
+ auto input1_idx = _graph->getInputs().at(input1);
+ auto input2_idx = _graph->getInputs().at(input2);
+
+ const int32_t input1_buffer[4] = {1, 0, -1, -2};
+ const int32_t input2_buffer[4] = {1, -3, 2, -4};
+
+ auto output = IOIndex{0};
+ auto output_idx = _graph->getOutputs().at(output);
+
+ int32_t output_buffer[4] = {};
+
+ EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
+ EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
+ EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
+ EXPECT_NO_THROW(_execution->execute());
+ EXPECT_EQ(output_buffer[0], 5);
+ EXPECT_EQ(output_buffer[1], -2);
+ EXPECT_EQ(output_buffer[2], 0);
+ EXPECT_EQ(output_buffer[3], -1);
+}
+
+} // namespace
diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
index 804e9fb51..fe4acd309 100644
--- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
+++ b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc
@@ -14,14 +14,14 @@
* limitations under the License.
*/
-#include <cker/operation/BinaryArithmeticOps.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/BinaryArithmetic.h"
-#include "misc/polymorphic_downcast.h"
-#include "cker/Types.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+#include <cker/Types.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/Concat.cc b/runtime/onert/core/src/interp/operations/Concat.cc
index a063ab14a..103604631 100644
--- a/runtime/onert/core/src/interp/operations/Concat.cc
+++ b/runtime/onert/core/src/interp/operations/Concat.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
-#include <cker/operation/Concatenation.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/Concat.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/Concatenation.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/Conv2D.cc b/runtime/onert/core/src/interp/operations/Conv2D.cc
index 0b43a4799..72c2057c2 100644
--- a/runtime/onert/core/src/interp/operations/Conv2D.cc
+++ b/runtime/onert/core/src/interp/operations/Conv2D.cc
@@ -14,15 +14,15 @@
* limitations under the License.
*/
-#include <cker/operation/Conv.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/Conv2D.h"
-#include "util/Utils.h"
#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
+#include "util/Utils.h"
+
+#include <cker/operation/Conv.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
index d1c62d73f..9f527440e 100644
--- a/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
+++ b/runtime/onert/core/src/interp/operations/DepthwiseConv2D.cc
@@ -14,15 +14,15 @@
* limitations under the License.
*/
-#include <cker/operation/DepthwiseConv.h>
-#include <misc/polymorphic_downcast.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/DepthwiseConv2D.h"
-#include "util/Utils.h"
#include "util/ShapeInference.h"
+#include "util/Utils.h"
+
+#include <cker/operation/DepthwiseConv.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
index 197855ff4..e13080e76 100644
--- a/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
+++ b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc
@@ -14,17 +14,16 @@
* limitations under the License.
*/
-#include <cmath>
-
#include "OperationUtil.h"
-
-#include "interp/Registration.h"
+#include "../Registration.h"
#include "ir/operation/ElementwiseActivation.h"
-#include <misc/polymorphic_downcast.h>
#include <cker/operation/Logistic.h>
#include <cker/operation/Tanh.h>
+#include <misc/polymorphic_downcast.h>
+
+#include <cmath>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/FullyConnected.cc b/runtime/onert/core/src/interp/operations/FullyConnected.cc
index ef827605b..2bc9f517f 100644
--- a/runtime/onert/core/src/interp/operations/FullyConnected.cc
+++ b/runtime/onert/core/src/interp/operations/FullyConnected.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
-#include <cker/operation/FullyConnected.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/FullyConnected.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/FullyConnected.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/Gather.cc b/runtime/onert/core/src/interp/operations/Gather.cc
index 0ea60875c..d686cfcf6 100644
--- a/runtime/onert/core/src/interp/operations/Gather.cc
+++ b/runtime/onert/core/src/interp/operations/Gather.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
-#include <cker/operation/Gather.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/Gather.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/Gather.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/InstanceNorm.cc b/runtime/onert/core/src/interp/operations/InstanceNorm.cc
index b5c38819d..318088457 100644
--- a/runtime/onert/core/src/interp/operations/InstanceNorm.cc
+++ b/runtime/onert/core/src/interp/operations/InstanceNorm.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
-#include <cker/operation/InstanceNorm.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/InstanceNorm.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/InstanceNorm.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/Pad.cc b/runtime/onert/core/src/interp/operations/Pad.cc
index 0eec7fe9a..3db0828eb 100644
--- a/runtime/onert/core/src/interp/operations/Pad.cc
+++ b/runtime/onert/core/src/interp/operations/Pad.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
-#include <cker/operation/Pad.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/Pad.h"
+#include <cker/operation/Pad.h>
+
namespace onert
{
namespace interp
diff --git a/runtime/onert/core/src/interp/operations/Pool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc
index 2f3b71655..3935d4756 100644
--- a/runtime/onert/core/src/interp/operations/Pool2D.cc
+++ b/runtime/onert/core/src/interp/operations/Pool2D.cc
@@ -14,16 +14,16 @@
* limitations under the License.
*/
-#include <cker/operation/AveragePool.h>
-#include <cker/operation/MaxPool.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/Pool2D.h"
-#include "util/Utils.h"
#include "util/ShapeInference.h"
-#include "misc/polymorphic_downcast.h"
+#include "util/Utils.h"
+
+#include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/Reshape.cc b/runtime/onert/core/src/interp/operations/Reshape.cc
index 3a118456b..1de5a5762 100644
--- a/runtime/onert/core/src/interp/operations/Reshape.cc
+++ b/runtime/onert/core/src/interp/operations/Reshape.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "interp/Registration.h"
+#include "../Registration.h"
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc
index 1fc303117..8be2f2210 100644
--- a/runtime/onert/core/src/interp/operations/Softmax.cc
+++ b/runtime/onert/core/src/interp/operations/Softmax.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
-#include <cker/operation/SoftMax.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/Softmax.h"
-#include "misc/polymorphic_downcast.h"
+
+#include <cker/operation/SoftMax.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
diff --git a/runtime/onert/core/src/interp/operations/TransposeConv.cc b/runtime/onert/core/src/interp/operations/TransposeConv.cc
index 755103dc2..59c8e8cdf 100644
--- a/runtime/onert/core/src/interp/operations/TransposeConv.cc
+++ b/runtime/onert/core/src/interp/operations/TransposeConv.cc
@@ -14,14 +14,14 @@
* limitations under the License.
*/
-#include <cker/operation/TransposeConv.h>
-#include <misc/polymorphic_downcast.h>
-
#include "OperationUtil.h"
+#include "../Registration.h"
-#include "interp/Registration.h"
#include "ir/operation/TransposeConv.h"
+#include <cker/operation/TransposeConv.h>
+#include <misc/polymorphic_downcast.h>
+
namespace onert
{
namespace interp
diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc
index df30bbdbe..28cf4137d 100644
--- a/runtime/onert/core/src/ir/Graph.cc
+++ b/runtime/onert/core/src/ir/Graph.cc
@@ -17,19 +17,9 @@
#include "ir/Graph.h"
#include "OperationValidator.h"
+#include "verifier/Verifier.h"
-#include <algorithm>
-
-#include <bitset>
-#include <sstream>
-
-#include "util/logging.h"
#include "util/Set.h"
-#include "verifier/Verifier.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperationIndexMap.h"
-#include "dumper/text/GraphDumper.h"
-#include "backend/IConfig.h"
namespace onert
{
@@ -38,6 +28,8 @@ namespace ir
Graph::Graph() = default;
+Graph::Graph(const Graph &) = default;
+
Graph::~Graph(void) = default;
OperandIndex Graph::addOperand(const Shape &shape, const TypeInfo &type)
diff --git a/runtime/onert/core/src/ir/Graph.test.cc b/runtime/onert/core/src/ir/Graph.test.cc
new file mode 100644
index 000000000..144500745
--- /dev/null
+++ b/runtime/onert/core/src/ir/Graph.test.cc
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Graph.h"
+#include "ir/operation/BinaryArithmetic.h"
+
+#include <gtest/gtest.h>
+
+TEST(Graph, neg_inputs_and_outputs)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::OperandIndex index0{0u};
+ onert::ir::OperandIndex index1{1u};
+
+ graph.addInput({index0});
+ graph.addInput({index1});
+
+ onert::ir::OperandIndex index10{10u};
+ onert::ir::OperandIndex index11{11u};
+ onert::ir::OperandIndex index12{12u};
+
+ graph.addOutput({index10});
+ graph.addOutput({index11});
+ graph.addOutput({index12});
+
+ ASSERT_EQ(graph.getInputs().size(), 2);
+ ASSERT_EQ(graph.getOutputs().size(), 3);
+
+ onert::ir::IOIndex io_index0{0};
+ onert::ir::IOIndex io_index1{1};
+ onert::ir::IOIndex io_index2{2};
+
+ ASSERT_EQ(graph.getInputs().at(io_index0), 0);
+ ASSERT_EQ(graph.getInputs().at(io_index1), 1);
+
+ ASSERT_EQ(graph.getOutputs().at(io_index0), 10);
+ ASSERT_EQ(graph.getOutputs().at(io_index1), 11);
+ ASSERT_EQ(graph.getOutputs().at(io_index2), 12);
+
+ EXPECT_THROW(graph.getOutputs().at(onert::ir::IOIndex{3}), std::out_of_range);
+}
+
+using namespace onert::ir;
+
+OperationIndex addAddOperation(Graph &graph, const OperandIndexSequence inputs,
+ const OperandIndexSequence outputs)
+{
+ // Add "ADD" operation
+ operation::BinaryArithmetic::Param param;
+ param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
+ param.activation = Activation::NONE;
+ return graph.addOperation(std::make_unique<operation::BinaryArithmetic>(inputs, outputs, param));
+}
+
+TEST(Graph, OneOpGraphSimpleValid)
+{
+ // Simple Graph with just one Add operation
+
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto lhs = graph.addOperand(shape, type);
+ auto rhs = graph.addOperand(shape, type);
+ auto res = graph.addOperand(shape, type);
+
+ addAddOperation(graph, {lhs, rhs}, {res});
+
+ // Set model inputs/outputs
+ graph.addInput(lhs);
+ graph.addInput(rhs);
+ graph.addOutput(res);
+
+ graph.verify();
+
+ SUCCEED();
+}
+
+TEST(Graph, neg_InvalidGraph_BadInput)
+{
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto in = graph.addOperand(shape, type);
+ auto out = graph.addOperand(shape, type);
+
+ // Set model inputs/outputs
+ graph.addInput(in);
+ graph.addOutput(out);
+ graph.addInput(OperandIndex{89}); // Non-exisiting operand!
+
+ EXPECT_ANY_THROW(graph.verify());
+}
+
+TEST(Graph, neg_InvalidGraph_BadOutput)
+{
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto in = graph.addOperand(shape, type);
+ auto out = graph.addOperand(shape, type);
+
+ // Set model inputs/outputs
+ graph.addInput(in);
+ graph.addOutput(out);
+ graph.addOutput(OperandIndex{12}); // Non-exisiting operand!
+
+ EXPECT_ANY_THROW(graph.verify());
+}
+
+TEST(Graph, neg_InvalidAddOperation_BadInputIndex)
+{
+ Graph graph;
+
+ // Add tensors
+ Shape shape{1, 2, 2, 1};
+ TypeInfo type{DataType::FLOAT32};
+ auto lhs = graph.addOperand(shape, type);
+ auto rhs = graph.addOperand(shape, type);
+ auto res = graph.addOperand(shape, type);
+
+ // Set model inputs/outputs
+ graph.addInput(lhs);
+ graph.addInput(rhs);
+ graph.addOutput(res);
+
+ ASSERT_FALSE(addAddOperation(graph, {lhs, OperandIndex{99}}, {res}).valid());
+}
diff --git a/runtime/onert/core/src/ir/LayoutSet.test.cc b/runtime/onert/core/src/ir/LayoutSet.test.cc
new file mode 100644
index 000000000..fc956abe8
--- /dev/null
+++ b/runtime/onert/core/src/ir/LayoutSet.test.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LayoutSet.h"
+
+#include <gtest/gtest.h>
+
+using onert::ir::Layout;
+using onert::ir::LayoutSet;
+
+TEST(ir_LayoutSet, neg_add_remove)
+{
+ LayoutSet set{Layout::NCHW};
+ set.remove(Layout::NHWC);
+ ASSERT_EQ(set.size(), 1);
+ set.add(Layout::NHWC);
+ ASSERT_EQ(set.size(), 2);
+ set.remove(Layout::NHWC);
+ ASSERT_EQ(set.size(), 1);
+ set.remove(Layout::NCHW);
+ ASSERT_EQ(set.size(), 0);
+ set.remove(Layout::NCHW);
+ ASSERT_EQ(set.size(), 0);
+}
+
+TEST(ir_LayoutSet, neg_add_twice)
+{
+ LayoutSet set;
+ set.add(Layout::NHWC);
+ ASSERT_EQ(set.size(), 1);
+ set.add(Layout::NHWC);
+ ASSERT_EQ(set.size(), 1);
+}
+
+TEST(ir_LayoutSet, set_operators)
+{
+ LayoutSet set1{Layout::NCHW};
+ LayoutSet set2{Layout::NHWC};
+ LayoutSet set3 = set1 | set2;
+
+ ASSERT_EQ(set3.size(), 2);
+
+ ASSERT_EQ((set3 - set1).size(), 1);
+ ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true);
+ ASSERT_EQ((set3 - set2).size(), 1);
+ ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true);
+ ASSERT_EQ((set3 - set3).size(), 0);
+
+ ASSERT_EQ((set3 & set1).size(), 1);
+ ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true);
+ ASSERT_EQ((set3 & set2).size(), 1);
+ ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true);
+ ASSERT_EQ((set1 & set2).size(), 0);
+}
diff --git a/runtime/onert/test/core/ir/MockNode.h b/runtime/onert/core/src/ir/MockNode.h
index 0e7ed977b..0e7ed977b 100644
--- a/runtime/onert/test/core/ir/MockNode.h
+++ b/runtime/onert/core/src/ir/MockNode.h
diff --git a/runtime/onert/core/src/ir/Operand.test.cc b/runtime/onert/core/src/ir/Operand.test.cc
new file mode 100644
index 000000000..0b858792a
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operand.test.cc
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Graph.h"
+
+#include "MockNode.h"
+#include "verifier/Verifier.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <typeindex>
+
+namespace
+{
+
+using IndexSet = onert::ir::OperandIndexSequence;
+using Mock = onert_test::ir::SimpleMock;
+
+} // namespace
+
+TEST(ir_Operand, neg_usedef)
+{
+ onert::ir::Graph graph;
+ onert::ir::verifier::DAGChecker verifier;
+
+ onert::ir::Shape shape(3);
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ // Model Input/Output
+ auto input_operand = graph.addOperand(shape, type);
+ auto output_operand = graph.addOperand(shape, type);
+
+ graph.addInput(input_operand);
+ graph.addOutput(output_operand);
+
+ // MockNode1
+ auto operand_index1 = graph.addOperand(shape, type);
+ auto mocknode_index1 =
+ graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
+
+ // MockNode2
+ auto operand_index2 = graph.addOperand(shape, type);
+ auto mocknode_index2 =
+ graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
+
+ // MockNode3(two input)
+ auto multiinput_index = graph.addOperation(
+ std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
+
+ graph.verify();
+
+ ASSERT_TRUE(verifier.verify(graph));
+
+ // Check def
+ ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1);
+ ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2);
+ ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index);
+
+ ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2);
+ ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index);
+
+ // Check use
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true);
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false);
+ ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true);
+ ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true);
+
+ ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2);
+ ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1);
+ ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0);
+}
diff --git a/runtime/onert/core/src/ir/OperandIndexSequence.test.cc b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc
new file mode 100644
index 000000000..588c4e419
--- /dev/null
+++ b/runtime/onert/core/src/ir/OperandIndexSequence.test.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/OperandIndexSequence.h"
+
+#include <gtest/gtest.h>
+
+using onert::ir::OperandIndex;
+using onert::ir::OperandIndexSequence;
+
+TEST(ir_OperandIndexSequence, neg_append)
+{
+ OperandIndexSequence iset{0, 2, 4, 8};
+
+ ASSERT_EQ(iset.size(), 4);
+
+ iset.append(OperandIndex{10});
+
+ ASSERT_EQ(iset.size(), 5);
+
+ onert::ir::IOIndex index1{1};
+ onert::ir::IOIndex index2{4};
+
+ ASSERT_EQ(iset.at(index1), 2);
+ ASSERT_EQ(iset.at(index2), 10);
+
+ ASSERT_TRUE(iset.contains(OperandIndex{2}));
+ ASSERT_TRUE(iset.contains(OperandIndex{10}));
+ ASSERT_FALSE(iset.contains(OperandIndex{11}));
+}
+
+TEST(graph_OperandIndexSequence, neg_replace)
+{
+ OperandIndexSequence iset{0, 1, 2, 3};
+
+ iset.replace(OperandIndex{1}, OperandIndex{9});
+ ASSERT_FALSE(iset.contains(OperandIndex{1}));
+ ASSERT_TRUE(iset.contains(OperandIndex{9}));
+}
diff --git a/runtime/onert/core/src/ir/Operands.test.cc b/runtime/onert/core/src/ir/Operands.test.cc
new file mode 100644
index 000000000..aff228b10
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operands.test.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Operands.h"
+
+#include <gtest/gtest.h>
+
+TEST(ir_Operands, neg_set_test)
+{
+ onert::ir::Operands set;
+
+ onert::ir::Shape shape0{1, 2, 3};
+
+ onert::ir::Shape shape1(4);
+ shape1.dim(0) = 10;
+ shape1.dim(1) = 20;
+ shape1.dim(2) = 30;
+ shape1.dim(3) = 40;
+
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ set.emplace(shape0, type);
+ set.emplace(shape1, type);
+
+ ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true);
+ ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true);
+ ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false);
+
+ ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1);
+ ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2);
+ ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3);
+}
diff --git a/runtime/onert/core/src/ir/Operation.test.cc b/runtime/onert/core/src/ir/Operation.test.cc
new file mode 100644
index 000000000..b3c4e852d
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operation.test.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Graph.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexSequence.h"
+#include "ir/operation/Concat.h"
+#include "ir/operation/Conv2D.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <stdexcept>
+
+using Index = onert::ir::IOIndex;
+using IndexSet = onert::ir::OperandIndexSequence;
+
+TEST(ir_Operation_setIO, operation_setIO_conv)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ // Add Conv
+ using Graph = onert::ir::operation::Conv2D;
+
+ auto input_operand = graph.addOperand(shape, type);
+ auto kernel_operand = graph.addOperand(shape, type);
+ auto bias_operand = graph.addOperand(shape, type);
+ IndexSet inputs{input_operand, kernel_operand, bias_operand};
+
+ Graph::Param conv_params;
+ conv_params.padding.type = onert::ir::PaddingType::SAME;
+ conv_params.stride.horizontal = 1;
+ conv_params.stride.vertical = 1;
+ conv_params.activation = onert::ir::Activation::NONE;
+
+ auto output_operand = graph.addOperand(shape, type).value();
+ IndexSet outputs{output_operand};
+
+ auto conv = std::make_unique<Graph>(inputs, outputs, conv_params);
+
+ ASSERT_NE(conv, nullptr);
+ ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
+ conv->setInputs({8, 9, 10});
+ ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
+ ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8);
+}
+
+TEST(ir_Operation_setIO, neg_operation_setIO_concat)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ using Graph = onert::ir::operation::Concat;
+
+ // Add Concat
+ IndexSet inputs;
+ for (int i = 0; i < 6; ++i)
+ {
+ inputs.append(graph.addOperand(shape, type));
+ }
+
+ Graph::Param concat_params{0};
+
+ auto output_operand = graph.addOperand(shape, type).value();
+ IndexSet outputs{output_operand};
+
+ auto concat = std::make_unique<Graph>(inputs, outputs, concat_params);
+
+ ASSERT_NE(concat, nullptr);
+ ASSERT_EQ(concat->getInputs().size(), 6);
+ ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
+
+ concat->setInputs({80, 6, 9, 11});
+ ASSERT_EQ(concat->getInputs().size(), 4);
+ ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
+ ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80);
+ ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9);
+ ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range);
+}
diff --git a/runtime/onert/core/src/ir/Operations.test.cc b/runtime/onert/core/src/ir/Operations.test.cc
new file mode 100644
index 000000000..e57872689
--- /dev/null
+++ b/runtime/onert/core/src/ir/Operations.test.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Operations.h"
+
+#include "MockNode.h"
+
+#include <gtest/gtest.h>
+
+using onert::ir::Operation;
+using onert::ir::OperationIndex;
+using onert::ir::Operations;
+
+TEST(ir_Operations, basic)
+{
+ Operations ops;
+ ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
+ OperationIndex idx{0u};
+ ASSERT_EQ(ops.at(idx).getInputs().size(), 4);
+ ASSERT_EQ(ops.at(idx).getOutputs().size(), 3);
+}
+
+TEST(ir_Operations, neg_at)
+{
+ Operations ops;
+ ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
+ OperationIndex idx{99u};
+ EXPECT_THROW(ops.at(idx), std::out_of_range);
+}
diff --git a/runtime/onert/core/src/ir/Shape.test.cc b/runtime/onert/core/src/ir/Shape.test.cc
new file mode 100644
index 000000000..afdb29254
--- /dev/null
+++ b/runtime/onert/core/src/ir/Shape.test.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/Shape.h"
+
+#include <gtest/gtest.h>
+
+TEST(ShapeTest, basic_test)
+{
+ {
+ onert::ir::Shape shape(3);
+
+ shape.dim(0) = 1;
+ shape.dim(1) = 2;
+ shape.dim(2) = 3;
+
+ ASSERT_EQ(shape.rank(), 3);
+ ASSERT_EQ(shape.num_elements(), 6);
+ ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
+ ASSERT_EQ(shape.hasUnspecifiedDims(), false);
+ }
+ {
+ onert::ir::Shape shape; // scalar or rank is unspecified
+
+ ASSERT_EQ(shape.rank(), 0);
+ ASSERT_EQ(shape.num_elements(), 1);
+ ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), true);
+ ASSERT_EQ(shape.hasUnspecifiedDims(), false);
+ }
+}
+
+TEST(ShapeTest, neg_basic_test)
+{
+ {
+ onert::ir::Shape shape(2);
+
+ shape.dim(0) = 1;
+ shape.dim(1) = onert::ir::Shape::UNSPECIFIED_DIM;
+
+ ASSERT_EQ(shape.rank(), 2);
+ ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
+ ASSERT_EQ(shape.hasUnspecifiedDims(), true);
+ EXPECT_ANY_THROW(shape.num_elements());
+ }
+}
diff --git a/runtime/onert/core/src/ir/verifier/Verifier.test.cc b/runtime/onert/core/src/ir/verifier/Verifier.test.cc
new file mode 100644
index 000000000..1ec71cd55
--- /dev/null
+++ b/runtime/onert/core/src/ir/verifier/Verifier.test.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Verifier.h"
+
+#include "../MockNode.h"
+
+#include "ir/Graph.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+using IndexSet = onert::ir::OperandIndexSequence;
+using Mock = onert_test::ir::SimpleMock;
+
+TEST(Verifier, dag_checker)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ auto operand1 = graph.addOperand(shape, type);
+ auto operand2 = graph.addOperand(shape, type);
+
+ graph.addInput(operand1);
+ graph.addOutput(operand2);
+
+ graph.addOperation(std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}));
+
+ onert::ir::verifier::DAGChecker verifier;
+
+ ASSERT_TRUE(verifier.verify(graph));
+}
+
+TEST(Verifier, neg_edge_consistency_checker_1)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ auto operand1 = graph.addOperand(shape, type);
+ auto operand2 = graph.addOperand(shape, type);
+
+ graph.addInput(operand1);
+ graph.addOutput(operand2);
+
+ auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
+ auto op_ind = graph.addOperation(std::move(mock_op));
+
+ graph.operands().at(operand1).removeUse(op_ind); // Manipulate the operand alone
+
+ onert::ir::verifier::EdgeChecker verifier;
+ ASSERT_FALSE(verifier.verify(graph));
+}
+
+TEST(Verifier, neg_edge_consistency_checker_2)
+{
+ onert::ir::Graph graph;
+
+ onert::ir::Shape shape{3};
+ onert::ir::TypeInfo type{onert::ir::DataType::INT32};
+
+ auto operand1 = graph.addOperand(shape, type);
+ auto operand2 = graph.addOperand(shape, type);
+
+ graph.addInput(operand1);
+ graph.addOutput(operand2);
+
+ auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
+ auto mock_op_ptr = mock_op.get();
+ auto op_ind = graph.addOperation(std::move(mock_op));
+
+ mock_op_ptr->setInputs({operand2}); // Manipulate the operation alone
+
+ onert::ir::verifier::EdgeChecker verifier;
+ ASSERT_FALSE(verifier.verify(graph));
+}
diff --git a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
index 3fc0c8ece..d868efedf 100644
--- a/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
+++ b/runtime/onert/core/src/util/ChromeTracingEventWriter.cc
@@ -14,12 +14,12 @@
* limitations under the License.
*/
-#include "util/EventWriter.h"
+#include "EventWriter.h"
-#include <sstream>
-#include <vector>
#include <cassert>
+#include <sstream>
#include <utility>
+#include <vector>
// json type for ChromeTracingWriter
namespace
diff --git a/runtime/onert/core/src/util/ConfigSource.cc b/runtime/onert/core/src/util/ConfigSource.cc
index 9da93f68a..b7fcefc7a 100644
--- a/runtime/onert/core/src/util/ConfigSource.cc
+++ b/runtime/onert/core/src/util/ConfigSource.cc
@@ -15,13 +15,15 @@
*/
#include "util/ConfigSource.h"
-#include "util/GeneralConfigSource.h"
-#include "util/EnvConfigSource.h"
+#include "util/logging.h"
+
+#include <misc/EnvConfigSource.h>
+#include <misc/GeneralConfigSource.h>
+#include <misc/IConfigSource.h>
-#include <array>
#include <algorithm>
+#include <array>
#include <cassert>
-
#include <memory>
namespace onert
@@ -29,12 +31,27 @@ namespace onert
namespace util
{
+using namespace nnfw::misc;
+
static std::unique_ptr<IConfigSource> _source;
static std::unique_ptr<IConfigSource> _source_ext;
void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); }
void config_source_ext(std::unique_ptr<IConfigSource> &&source) { _source_ext = std::move(source); }
+void setConfigKeyValues(const CfgKeyValues &keyValues)
+{
+ auto configsrc = std::make_unique<GeneralConfigSource>();
+
+ for (auto it = keyValues.begin(); it != keyValues.end(); ++it)
+ {
+ VERBOSE(NNPKG_CONFIGS) << "(" << it->first << ") = (" << it->second << ")" << std::endl;
+ configsrc->set(it->first, it->second);
+ }
+
+ onert::util::config_source_ext(std::move(configsrc));
+}
+
static IConfigSource *config_source()
{
if (!_source)
diff --git a/runtime/onert/core/src/util/EnvConfigSource.cc b/runtime/onert/core/src/util/EnvConfigSource.cc
deleted file mode 100644
index 0d25b7353..000000000
--- a/runtime/onert/core/src/util/EnvConfigSource.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/EnvConfigSource.h"
-
-#include <cstdlib>
-
-namespace onert
-{
-namespace util
-{
-
-std::string EnvConfigSource::get(const std::string &key) const
-{
- const char *value = std::getenv(key.c_str());
- if (value != nullptr)
- {
- return value;
- }
- else
- {
- return GeneralConfigSource::get(key);
- }
-}
-
-} // namespace util
-} // namespace onert
diff --git a/runtime/onert/core/src/util/EventCollector.cc b/runtime/onert/core/src/util/EventCollector.cc
index 83c2649d1..c1b9c4315 100644
--- a/runtime/onert/core/src/util/EventCollector.cc
+++ b/runtime/onert/core/src/util/EventCollector.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "util/EventCollector.h"
+#include "EventCollector.h"
// C++ standard libraries
#include <chrono>
diff --git a/runtime/onert/core/src/util/EventCollector.h b/runtime/onert/core/src/util/EventCollector.h
index 774fe05ef..effb72373 100644
--- a/runtime/onert/core/src/util/EventCollector.h
+++ b/runtime/onert/core/src/util/EventCollector.h
@@ -17,12 +17,13 @@
#ifndef __ONERT_UTIL_EVENT_COLLECTOR_H__
#define __ONERT_UTIL_EVENT_COLLECTOR_H__
-#include "util/EventRecorder.h"
+#include "EventRecorder.h"
+
#include "util/TracingCtx.h"
-#include <vector>
-#include <utility>
#include <string>
+#include <utility>
+#include <vector>
class EventCollector
{
diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc
index 5d3d5f5c6..85a588d38 100644
--- a/runtime/onert/core/src/util/EventRecorder.cc
+++ b/runtime/onert/core/src/util/EventRecorder.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "util/EventRecorder.h"
+#include "EventRecorder.h"
void EventRecorder::emit(std::unique_ptr<DurationEvent> &&evt)
{
diff --git a/runtime/onert/core/src/util/EventWriter.cc b/runtime/onert/core/src/util/EventWriter.cc
index c42c53730..ca4bd302e 100644
--- a/runtime/onert/core/src/util/EventWriter.cc
+++ b/runtime/onert/core/src/util/EventWriter.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "util/EventWriter.h"
+#include "EventWriter.h"
#include <cassert>
diff --git a/runtime/onert/core/src/util/GeneralConfigSource.cc b/runtime/onert/core/src/util/GeneralConfigSource.cc
deleted file mode 100644
index 7d2757e58..000000000
--- a/runtime/onert/core/src/util/GeneralConfigSource.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/GeneralConfigSource.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace util
-{
-
-std::string GeneralConfigSource::get(const std::string &key) const
-{
- auto itr = _map.find(key);
- if (itr == _map.end())
- {
- return "";
- }
- else
- {
- return itr->second;
- }
-}
-
-void GeneralConfigSource::set(const std::string &key, const std::string &val)
-{
- VERBOSE(GeneralConfigSource) << key << " : " << val << std::endl;
- _map[key] = val;
-}
-
-} // namespace util
-} // namespace onert
diff --git a/runtime/onert/core/src/util/Index.test.cc b/runtime/onert/core/src/util/Index.test.cc
new file mode 100644
index 000000000..ff73e5e59
--- /dev/null
+++ b/runtime/onert/core/src/util/Index.test.cc
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/Index.h"
+
+#include <gtest/gtest.h>
+
+using Index = ::onert::util::Index<uint32_t, struct TestTag>;
+
+TEST(Index, neg_index_test)
+{
+ Index idx1{1u};
+ Index idx2{2u};
+ Index idx3{idx1};
+
+ ASSERT_EQ(idx1, 1);
+ ASSERT_EQ(idx1, 1u);
+ ASSERT_EQ(idx1.value(), 1u);
+ ASSERT_NE(idx1, idx2);
+ ASSERT_EQ(idx1, idx3);
+}
diff --git a/runtime/onert/core/src/util/MDTableEventWriter.cc b/runtime/onert/core/src/util/MDTableEventWriter.cc
index b7fbac5e2..7a8b9f234 100644
--- a/runtime/onert/core/src/util/MDTableEventWriter.cc
+++ b/runtime/onert/core/src/util/MDTableEventWriter.cc
@@ -14,16 +14,16 @@
* limitations under the License.
*/
-#include "util/EventWriter.h"
+#include "EventWriter.h"
-#include <sstream>
-#include <vector>
-#include <unordered_map>
#include <cassert>
-#include <utility>
#include <map>
#include <set>
+#include <sstream>
#include <stdint.h>
+#include <unordered_map>
+#include <utility>
+#include <vector>
// md table type
namespace
diff --git a/runtime/onert/core/src/util/ObjectManager.test.cc b/runtime/onert/core/src/util/ObjectManager.test.cc
new file mode 100644
index 000000000..3fe735732
--- /dev/null
+++ b/runtime/onert/core/src/util/ObjectManager.test.cc
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/Index.h"
+#include "util/ObjectManager.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert;
+
+struct TestTag;
+using Index = typename util::Index<uint32_t, TestTag>;
+
+TEST(ObjectManager, emplace)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index = man.emplace(100);
+ ASSERT_EQ(man.at(index), 100);
+}
+
+TEST(ObjectManager, neg_remove_1)
+{
+ util::ObjectManager<Index, int> man;
+
+ Index index = man.emplace(100);
+ ASSERT_TRUE(man.exist(index));
+ ASSERT_EQ(man.at(index), 100);
+
+ man.remove(index);
+ ASSERT_FALSE(man.exist(index));
+}
+
+TEST(ObjectManager, neg_remove_2)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index0 = man.emplace(100);
+ auto index1 = man.emplace(200);
+ ASSERT_TRUE(man.exist(index0));
+ ASSERT_EQ(man.at(index0), 100);
+ ASSERT_TRUE(man.exist(index1));
+ ASSERT_EQ(man.at(index1), 200);
+
+ man.remove(index0);
+ ASSERT_FALSE(man.exist(index0));
+ ASSERT_TRUE(man.exist(index1));
+ ASSERT_EQ(man.at(index1), 200);
+}
+
+TEST(ObjectManager, push)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Not specify index
+ auto index = man.push(std::make_unique<int>(100));
+ ASSERT_EQ(man.at(index), 100);
+
+ // Specify index
+ auto index2 = man.push(std::make_unique<int>(200), Index{33});
+ ASSERT_EQ(index2.value(), 33);
+ ASSERT_EQ(man.at(index2), 200);
+
+ auto index3 = man.push(std::make_unique<int>(300));
+ // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
+ ASSERT_EQ(index3.value(), 34);
+ ASSERT_EQ(man.at(index3), 300);
+
+ auto index4 = man.push(std::make_unique<int>(400), Index{22});
+ ASSERT_EQ(index4.value(), 22);
+ ASSERT_EQ(man.at(index4), 400);
+
+ auto index5 = man.push(std::make_unique<int>(500));
+ // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
+ ASSERT_EQ(index5.value(), 35);
+ ASSERT_EQ(man.at(index5), 500);
+}
+
+TEST(ObjectManager, neg_push)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Specify index
+ auto index = man.push(std::make_unique<int>(100), Index{55});
+ ASSERT_EQ(index.value(), 55);
+ ASSERT_EQ(man.at(index), 100);
+
+ // Specify the same index
+ auto index2 = man.push(std::make_unique<int>(200), Index{55});
+ ASSERT_FALSE(index2.valid());
+}
+
+static const uint32_t kMaxUInt32 = std::numeric_limits<uint32_t>::max();
+
+TEST(ObjectManager, neg_push_undefined_index)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Try inserting invalid(undefined) index
+ auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32});
+ ASSERT_FALSE(index.valid());
+ ASSERT_EQ(man.size(), 0);
+}
+
+TEST(ObjectManager, neg_push_max_index)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Insert an object with maximum valid index
+ auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
+ ASSERT_EQ(index.value(), kMaxUInt32 - 1);
+ ASSERT_EQ(man.at(index), 100);
+ ASSERT_EQ(man.size(), 1);
+
+ // Reached to the final index so next push/emplace must fail
+ auto index2 = man.push(std::make_unique<int>(200));
+ ASSERT_EQ(man.size(), 1);
+ ASSERT_FALSE(index2.valid());
+}
+
+TEST(ObjectManager, neg_emplace_max_index)
+{
+ util::ObjectManager<Index, int> man;
+
+ // Insert an object with maximum valid index
+ auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
+ ASSERT_EQ(index.value(), kMaxUInt32 - 1);
+ ASSERT_EQ(man.at(index), 100);
+ ASSERT_EQ(man.size(), 1);
+
+ // Reached to the final index so next push/emplace must fail
+ auto index3 = man.emplace(200);
+ ASSERT_EQ(man.size(), 1);
+ ASSERT_FALSE(index3.valid());
+}
+
+TEST(ObjectManager, const_iterate)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index0 = man.emplace(100);
+ auto index1 = man.emplace(200);
+ auto index2 = man.emplace(300);
+
+ int sum = 0;
+ man.iterate([&](const Index &index, const int &val) { sum += val; });
+ ASSERT_EQ(sum, 600);
+}
+
+TEST(ObjectManager, non_const_iterate)
+{
+ util::ObjectManager<Index, int> man;
+
+ auto index0 = man.emplace(100);
+ auto index1 = man.emplace(200);
+ auto index2 = man.emplace(300);
+
+ man.iterate([&](const Index &index, int &val) { val += 1; });
+ ASSERT_EQ(man.at(index0), 101);
+ ASSERT_EQ(man.at(index1), 201);
+ ASSERT_EQ(man.at(index2), 301);
+}
+
+TEST(ObjectManager, set)
+{
+ util::ObjectManager<Index, int> man;
+ auto index = man.set(Index{1}, std::make_unique<int>(100)); // Insert
+ ASSERT_EQ(index, Index{1});
+ auto index2 = man.set(index, std::make_unique<int>(200)); // Overwrite
+ ASSERT_EQ(index2, index);
+ ASSERT_EQ(man.at(index2), 200);
+}
+
+TEST(ObjectManager, neg_set)
+{
+ auto v = std::make_unique<int>(100);
+ util::ObjectManager<Index, int> man;
+ auto index = man.set(Index{}, std::move(v)); // Try set with an invalid index
+ ASSERT_EQ(index, Index{});
+ ASSERT_FALSE(index.valid());
+ ASSERT_NE(v, nullptr); // v must be kept when failure
+}
+
+TEST(ObjectManager, getRawPtr)
+{
+ auto v = std::make_unique<int>(100);
+ auto v_ptr = v.get();
+ util::ObjectManager<Index, int> man;
+ auto index = man.push(std::move(v));
+ ASSERT_EQ(v_ptr, man.getRawPtr(index));
+}
+
+TEST(ObjectManager, neg_getRawPtr)
+{
+ util::ObjectManager<Index, int> man;
+ auto ptr = man.getRawPtr(Index{1});
+ ASSERT_EQ(ptr, nullptr);
+}
diff --git a/runtime/onert/core/src/util/SNPEEventWriter.cc b/runtime/onert/core/src/util/SNPEEventWriter.cc
index 6f03cfccf..4dea6d16c 100644
--- a/runtime/onert/core/src/util/SNPEEventWriter.cc
+++ b/runtime/onert/core/src/util/SNPEEventWriter.cc
@@ -14,11 +14,12 @@
* limitations under the License.
*/
-#include "util/EventWriter.h"
+#include "EventWriter.h"
-#include <unordered_map>
#include <json/json.h>
+
#include <cassert>
+#include <unordered_map>
#include <utility>
/**
diff --git a/runtime/onert/core/src/util/ShapeInference.test.cc b/runtime/onert/core/src/util/ShapeInference.test.cc
new file mode 100644
index 000000000..96579bfa2
--- /dev/null
+++ b/runtime/onert/core/src/util/ShapeInference.test.cc
@@ -0,0 +1,544 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/ShapeInference.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::ir;
+
+TEST(ShapeInference, Elementwise)
+{
+ Shape lhs_shape{1, 299, 299, 3};
+ Shape rhs_shape{3};
+ auto infered_out_shape = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.dim(0), 1);
+ ASSERT_EQ(infered_out_shape.dim(1), 299);
+ ASSERT_EQ(infered_out_shape.dim(2), 299);
+ ASSERT_EQ(infered_out_shape.dim(3), 3);
+}
+
+TEST(ShapeInference, neg_Elementwise)
+{
+ Shape lhs_shape{1, 299, 299, 3};
+ Shape rhs_shape{5, 3};
+ ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error);
+}
+
+TEST(ShapeInference, Pool2DNodeSame)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Stride stride{3, 7};
+ Padding padding{PaddingType::SAME};
+
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Pool2DNodeValid)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Stride stride{3, 7};
+ Padding padding{PaddingType::VALID};
+
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, Pool2DNodeExplicit)
+{
+ Shape in_shape{10, 3, 5, 20};
+
+ Stride stride{3, 7};
+ Padding padding{4, 3, 2, 1};
+
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+
+ operation::Pool2D::Param max_pool_param{
+ operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
+ infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
+}
+
+TEST(ShapeInference, neg_Pool2DNode_InvalidStride)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Stride stride{0, 7};
+ Padding padding{PaddingType::SAME};
+
+ operation::Pool2D::Param avg_pool_param{
+ operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
+ ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param),
+ std::runtime_error);
+}
+
+TEST(ShapeInference, Conv2D)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{30, 3, 6, 20};
+
+ operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
+ Dilation{1, 1}};
+ auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+
+ param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
+ Dilation{1, 1}};
+ infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+
+ param =
+ operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
+ infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
+}
+
+TEST(ShapeInference, neg_Conv2D_InvalidStride)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{30, 3, 6, 20};
+
+ operation::Conv2D::Param param{Stride{0, 0}, Padding{PaddingType::VALID}, Activation::NONE,
+ Dilation{1, 1}};
+ ASSERT_THROW(onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param),
+ std::runtime_error);
+}
+
+TEST(ShapeInference, DepthwiseConv2D)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{1, 3, 6, 60};
+
+ operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
+ Activation::NONE, Dilation{1, 1}};
+ auto infered_out_shape =
+ onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+
+ param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3,
+ Activation::NONE, Dilation{1, 1}};
+ infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+
+ param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE,
+ Dilation{1, 1}};
+ infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 4);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
+ ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
+}
+
+TEST(ShapeInference, neg_DepthwiseConv2D_InvalidSride)
+{
+ Shape in_shape{10, 6, 12, 20};
+ Shape ker_shape{1, 3, 6, 60};
+
+ operation::DepthwiseConv2D::Param param{Stride{3, 0}, Padding{PaddingType::VALID}, 3,
+ Activation::NONE, Dilation{1, 1}};
+ ASSERT_THROW(onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param),
+ std::runtime_error);
+}
+
+TEST(ShapeInference, Concat)
+{
+ {
+ Shape in1{10, 20, 30, 3, 50};
+ Shape in2{10, 20, 30, 2, 50};
+ Shape in3{10, 20, 30, 2, 50};
+
+ operation::Concat::Param param{3};
+ auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2, in3}, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 5);
+ ASSERT_EQ(infered_out_shape.dim(0), 10);
+ ASSERT_EQ(infered_out_shape.dim(1), 20);
+ ASSERT_EQ(infered_out_shape.dim(2), 30);
+ ASSERT_EQ(infered_out_shape.dim(3), 7);
+ ASSERT_EQ(infered_out_shape.dim(4), 50);
+ }
+ {
+ // case 1. when axis < 0
+ Shape in1{10, 20, 2};
+ Shape in2{10, 20, 3};
+
+ operation::Concat::Param param{-1};
+ auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 3);
+ ASSERT_EQ(infered_out_shape.dim(0), 10);
+ ASSERT_EQ(infered_out_shape.dim(1), 20);
+ ASSERT_EQ(infered_out_shape.dim(2), 5);
+ }
+ {
+ // case 2. when axis < 0
+ Shape in1{2, 20, 2};
+ Shape in2{3, 20, 2};
+
+ operation::Concat::Param param{-3};
+ auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
+
+ ASSERT_EQ(infered_out_shape.rank(), 3);
+ ASSERT_EQ(infered_out_shape.dim(0), 5);
+ ASSERT_EQ(infered_out_shape.dim(1), 20);
+ ASSERT_EQ(infered_out_shape.dim(2), 2);
+ }
+}
+
+TEST(ShapeInference, neg_Concat)
+{
+ {
+ operation::Concat::Param param{2};
+ Shape in1{10, 1, 3};
+ Shape in2{10, 2, 4}; // dim[1] should be 1 but 2
+
+ EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
+ }
+ { // wrong rank
+ operation::Concat::Param param{2};
+ Shape in1{10, 2, 3, 4};
+ Shape in2{10, 2, 4}; // rank should be 4
+
+ EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
+ }
+}
+
+TEST(ShapeInference, ExpandDims)
+{
+ Shape in_shape{30, 40};
+
+ auto check = [&](int32_t axis, Shape &expected) {
+ auto actual = onert::shape_inference::inferExpandDimsShape(in_shape, axis);
+
+ ASSERT_EQ(actual.rank(), 3);
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ { // boundary
+ int32_t axis = 0;
+ Shape expected{1, 30, 40};
+ check(axis, expected);
+ }
+ { // boundary
+ int32_t axis = 2;
+ Shape expected{30, 40, 1};
+ check(axis, expected);
+ }
+ { // inside
+ int32_t axis = 1;
+ Shape expected{30, 1, 40};
+ check(axis, expected);
+ }
+ { // negative boundary
+ int32_t axis = -1;
+ Shape expected{30, 40, 1};
+ check(axis, expected);
+ }
+ { // negative boundary
+ int32_t axis = -3;
+ Shape expected{1, 30, 40};
+ check(axis, expected);
+ }
+}
+
+TEST(ShapeInference, neg_ExpandDims)
+{
+ Shape in_shape{30, 40};
+
+ { // over boundary
+ int32_t axis = 3;
+ ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
+ }
+ { // over boundary
+ int32_t axis = -4;
+ ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
+ }
+}
+
+TEST(ShapeInference, FullyConnected)
+{
+ Shape in_shape{3, 4, 5, 6};
+ Shape ker_shape{3, 10};
+ auto infered_out_shape = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape);
+
+ ASSERT_EQ(infered_out_shape.rank(), 2);
+ ASSERT_EQ(infered_out_shape.dim(0), 36);
+ ASSERT_EQ(infered_out_shape.dim(1), 3);
+}
+
+TEST(ShapeInference, Transpose)
+{
+ auto check = [&](Shape &in_shape, std::vector<int> perm, Shape &expected) {
+ // pre-conditions
+ ASSERT_EQ(in_shape.rank(), perm.size());
+ ASSERT_EQ(expected.rank(), perm.size());
+ auto inferred_out_shape =
+ onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
+ // post-conditions
+ ASSERT_EQ(inferred_out_shape.rank(), perm.size());
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ {
+ ASSERT_EQ(inferred_out_shape.dim(dim), expected.dim(dim));
+ }
+ };
+ // check for 2-D
+ {
+ Shape in_shape{2, 3};
+ std::vector<int> perm = {1, 0};
+ Shape expected{3, 2};
+ // int32_t rank = 2;
+ check(in_shape, perm, expected);
+ }
+ // check for 3-D
+ {
+ Shape in_shape{1, 2, 3};
+ std::vector<int> perm = {2, 0, 1};
+ Shape expected{3, 1, 2};
+ // int32_t rank = 3;
+ check(in_shape, perm, expected);
+ }
+ // check for 4-D
+ {
+ Shape in_shape{1, 2, 3, 4};
+ std::vector<int> perm = {1, 3, 0, 2};
+ Shape expected{2, 4, 1, 3};
+ // int32_t rank = 4;
+ check(in_shape, perm, expected);
+ }
+}
+
+TEST(ShapeInference, neg_Transpose)
+{
+ Shape in_shape{1, 2, 3};
+ // Invalid parameter size
+ {
+ std::vector<int> perm = {2, 0, 1, 0};
+ // int32_t rank = 3;
+ ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
+ std::runtime_error);
+ }
+ // Invalid parameter value
+ {
+ std::vector<int> perm = {2, 0, 3};
+ // int32_t rank = 3;
+ ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
+ std::runtime_error);
+ }
+}
+
+TEST(ShapeInference, Gather)
+{
+ auto check = [&](Shape &input, Shape &indices, Shape &expected, int32_t axis) {
+ int rank = input.rank();
+ auto actual = onert::shape_inference::inferGatherShape(input, indices, axis, rank);
+
+ ASSERT_EQ(actual.rank(), expected.rank());
+
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ // check for 2-D, 3-D, axis 0
+ {
+ Shape input{3, 4};
+ Shape indices{1, 1, 2};
+ int32_t axis = 0;
+ Shape expected{1, 1, 2, 4};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 2-D, 3-D, axis 1
+ {
+ Shape input{3, 4};
+ Shape indices{1, 2, 1};
+ int32_t axis = 1;
+ Shape expected{3, 1, 2, 1};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 3-D, 2-D, axis 0
+ {
+ Shape input{2, 3, 4};
+ Shape indices{1, 2};
+ int32_t axis = 0;
+ Shape expected{1, 2, 3, 4};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 3-D, 2-D, axis 2
+ {
+ Shape input{2, 3, 4};
+ Shape indices{2, 1};
+ int32_t axis = 2;
+ Shape expected{2, 3, 2, 1};
+ check(input, indices, expected, axis);
+ }
+
+ // check for 4D, axis 0
+ {
+ Shape input{1, 2, 3, 4};
+ Shape indices{2};
+ int32_t axis = 0;
+ Shape expected{2, 2, 3, 4};
+ check(input, indices, expected, axis);
+ }
+}
+
+TEST(ShapeInference, BCQFullyConnected)
+{
+ auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster,
+ Shape &expected) {
+ auto actual =
+ onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data());
+ ASSERT_EQ(actual.rank(), expected.rank());
+
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ {
+ Shape in_shape{10, 1};
+ Shape cluster_shape{3, 2};
+ std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+
+ Shape expected{30, 1};
+ check(in_shape, cluster_shape, cluster, expected);
+ }
+
+ {
+ Shape in_shape{1, 1};
+ Shape cluster_shape{1, 2};
+ std::vector<int> cluster = {3, 50};
+
+ Shape expected{50, 1};
+ check(in_shape, cluster_shape, cluster, expected);
+ }
+}
+
+TEST(ShapeInference, BCQGather)
+{
+ auto check = [&](Shape &indices_shape, Shape &cluster_shape, std::vector<int> cluster,
+ uint32_t hidden_size, uint32_t axis, int rank, Shape &expected) {
+ operation::BCQGather::Param param{hidden_size, axis};
+ auto actual = onert::shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
+ cluster.data(), rank, param);
+ ASSERT_EQ(actual.rank(), expected.rank());
+
+ for (int32_t dim = 0; dim < expected.rank(); dim++)
+ ASSERT_EQ(actual.dim(dim), expected.dim(dim));
+ };
+
+ {
+ Shape indices_shape{5, 1};
+ Shape cluster_shape{3, 2};
+ std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+ uint32_t hidden_size = 10;
+ uint32_t axis = 0;
+ int rank = 2;
+
+ Shape expected{5, 1, 10};
+ check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
+ }
+
+ {
+ Shape indices_shape{5, 1};
+ Shape cluster_shape{3, 2};
+ std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
+ uint32_t hidden_size = 10;
+ uint32_t axis = 1;
+ int rank = 2;
+
+ Shape expected{30, 5, 1};
+ check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
+ }
+}
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h
index 5649f286d..cf080abbc 100644
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -65,10 +65,10 @@ public:
/**
* @brief Construct a new Loader object
*
- * @param graph reference on subgraphs
+ * @param model reference to model
*/
- explicit BaseLoader(std::unique_ptr<ir::Subgraphs> &subgs)
- : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _subgraphs(subgs), _model{nullptr},
+ explicit BaseLoader(std::unique_ptr<ir::Model> &model)
+ : _base{nullptr}, _pagesize(getpagesize()), _fd(-1), _model(model), _domain_model{nullptr},
_tensor_names(std::make_shared<std::unordered_map<ir::OperandIndex, std::string>>())
{
_use_mmaped_data = util::getConfigBool(util::config::USE_MMAPED_DATA);
@@ -114,7 +114,7 @@ protected:
// Get BuiltinOperator
BuiltinOperator getBuiltinOperator(const Operator *op)
{
- auto const builtin_opcode = _model->operator_codes()->Get(op->opcode_index());
+ auto const builtin_opcode = _domain_model->operator_codes()->Get(op->opcode_index());
auto builtin_op = builtin_opcode->builtin_code();
if (builtin_op < BuiltinOperator::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES)
builtin_op = static_cast<BuiltinOperator>(builtin_opcode->deprecated_builtin_code());
@@ -176,7 +176,7 @@ private:
void verifySubgraphIndex(int subg_index)
{
- const auto num_subgraphs = _model->subgraphs()->size();
+ const auto num_subgraphs = _domain_model->subgraphs()->size();
if (subg_index < 0 || subg_index >= static_cast<int32_t>(num_subgraphs))
throw std::runtime_error{std::string{"Invalid subgraph index - "} +
std::to_string(subg_index)};
@@ -189,9 +189,9 @@ protected:
int32_t _pagesize;
// loaded file description
int _fd;
- // Reference on loadable subgraphs
- std::unique_ptr<ir::Subgraphs> &_subgraphs;
- const Model *_model;
+ // Reference to ir::model (to be loaded from _domain_model)
+ std::unique_ptr<ir::Model> &_model;
+ const Model *_domain_model;
// Maps Tensor indices to onert Operands.
std::vector<ir::OperandIndex> _tensor_to_operand;
std::shared_ptr<std::unordered_map<ir::OperandIndex, std::string>> _tensor_names;
@@ -290,6 +290,8 @@ ir::DataType BaseLoader<LoaderDomain>::BaseLoader::tensorTypeToDataType(const Te
case TensorType::TensorType_INT8:
return ir::DataType::QUANT_INT8_ASYMM;
// case TensorType::TensorType_FLOAT64
+ case TensorType::TensorType_UINT32:
+ return ir::DataType::UINT32;
default:
throw std::runtime_error(
std::string("Unsupported tensor type: ").append(EnumNameTensorType(type)));
@@ -358,7 +360,7 @@ ir::OperandIndex BaseLoader<LoaderDomain>::loadOperand(const Tensor *tensor, ir:
const auto operand_index = subg.addOperand(shape, type_info);
// Constant tensors are indicated by non-empty data.
- const auto *data = _model->buffers()->Get(tensor->buffer())->data();
+ const auto *data = _domain_model->buffers()->Get(tensor->buffer())->data();
if (data != nullptr)
{
using std::ptrdiff_t;
@@ -1037,7 +1039,7 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
assert(op->custom_options_format() == CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS &&
"Unsupported custom operation options format");
- auto *op_code = _model->operator_codes()->Get(op->opcode_index());
+ auto *op_code = _domain_model->operator_codes()->Get(op->opcode_index());
auto custom_op_name = op_code->custom_code()->str();
enum class BuiltinOP
@@ -1670,7 +1672,7 @@ void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg
template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadModel()
{
LoaderDomain::VerifyModelBuffer(*_verifier.get());
- _model = LoaderDomain::GetModel(_base);
+ _domain_model = LoaderDomain::GetModel(_base);
// Version unused
// const auto version = _model->version();
// Description unused
@@ -1678,14 +1680,14 @@ template <typename LoaderDomain> void BaseLoader<LoaderDomain>::loadModel()
// Metabuffer unsued
// const auto *metadata_buffer = _model->metadata_buffer();
// Load subgraphs and map operations on subgraph
- const auto domain_subgraphs = _model->subgraphs();
- auto subgraphs = std::make_unique<ir::Subgraphs>();
- for (uint32_t subgraph_index = 0; subgraph_index < domain_subgraphs->size(); ++subgraph_index)
+ const auto subgraphs = _domain_model->subgraphs();
+ auto model = std::make_unique<ir::Model>();
+ for (uint32_t subgraph_index = 0; subgraph_index < subgraphs->size(); ++subgraph_index)
{
- auto subg = loadSubgraph((*_model->subgraphs())[subgraph_index]);
- subgraphs->push(ir::SubgraphIndex{subgraph_index}, std::move(subg));
+ auto subg = loadSubgraph((*_domain_model->subgraphs())[subgraph_index]);
+ model->push(ir::SubgraphIndex{subgraph_index}, std::move(subg));
}
- _subgraphs = std::move(subgraphs);
+ _model = std::move(model);
}
} // namespace base_loader
diff --git a/runtime/onert/frontend/circle/include/circle_loader.h b/runtime/onert/frontend/circle/include/circle_loader.h
index 44bf28056..87e5d70ae 100644
--- a/runtime/onert/frontend/circle/include/circle_loader.h
+++ b/runtime/onert/frontend/circle/include/circle_loader.h
@@ -25,8 +25,8 @@ namespace onert
{
namespace circle_loader
{
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
-std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size);
+std::unique_ptr<ir::Model> loadModel(const std::string &filename);
+std::unique_ptr<ir::Model> loadModel(uint8_t *buffer, size_t size);
} // namespace circle_loader
} // namespace onert
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc
index aae831d61..5abcc9cd0 100644
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -228,20 +228,20 @@ void CircleLoader::loadBCQFullyConnected(const Operator *op, ir::Graph &subg)
} // namespace
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
+std::unique_ptr<ir::Model> loadModel(const std::string &filename)
{
- auto subgraphs = std::make_unique<ir::Subgraphs>();
- CircleLoader loader(subgraphs);
+ auto model = std::make_unique<ir::Model>();
+ CircleLoader loader(model);
loader.loadFromFile(filename);
- return subgraphs;
+ return model;
}
-std::unique_ptr<ir::Subgraphs> loadModel(uint8_t *buffer, size_t size)
+std::unique_ptr<ir::Model> loadModel(uint8_t *buffer, size_t size)
{
- auto subgraphs = std::make_unique<ir::Subgraphs>();
- CircleLoader loader(subgraphs);
+ auto model = std::make_unique<ir::Model>();
+ CircleLoader loader(model);
loader.loadFromBuffer(buffer, size);
- return subgraphs;
+ return model;
}
} // namespace circle_loader
diff --git a/runtime/onert/frontend/nnapi/execution.cc b/runtime/onert/frontend/nnapi/execution.cc
index 56ca5ef00..19636a84d 100644
--- a/runtime/onert/frontend/nnapi/execution.cc
+++ b/runtime/onert/frontend/nnapi/execution.cc
@@ -37,7 +37,7 @@ int ANeuralNetworksExecution_create(ANeuralNetworksCompilation *compilation,
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
- std::shared_ptr<onert::exec::ExecutorMap> executors;
+ std::shared_ptr<onert::exec::Executors> executors;
compilation->publish(executors);
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
index 63036a398..bb247b97f 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.cc
@@ -18,11 +18,12 @@
#include "util/logging.h"
+using namespace onert;
+
// TODO Support multiple subgraphs
ANeuralNetworksCompilation::ANeuralNetworksCompilation(const ANeuralNetworksModel *model) noexcept
- : _subgraphs{model->getSubGraphs()}, _tracing_ctx{std::make_unique<onert::util::TracingCtx>(
- _subgraphs.get())},
- _compiler{new onert::compiler::Compiler{_subgraphs, _tracing_ctx.get()}}
+ : _model{model->getModel()}, _coptions{compiler::CompilerOptions::fromGlobalConfig()},
+ _compiler{std::make_shared<compiler::Compiler>(_model, *_coptions)}
{
if (model->allowedToFp16())
{
@@ -34,7 +35,7 @@ bool ANeuralNetworksCompilation::finish() noexcept
{
try
{
- _executors = _compiler->compile();
+ _artifact = _compiler->compile();
}
catch (const std::exception &e)
{
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
index bd61f9d86..dff5c6dc6 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksCompilation.h
@@ -21,8 +21,8 @@
#include "compiler/Compiler.h"
#include "ir/Graph.h"
-#include "ir/Subgraphs.h"
-#include "exec/IExecutor.h"
+#include "ir/Model.h"
+#include "exec/Executors.h"
#include "util/TracingCtx.h"
struct ANeuralNetworksCompilation
@@ -34,23 +34,16 @@ public:
bool finish() noexcept;
onert::compiler::State state(void) noexcept { return _compiler->state(); }
- void publish(std::shared_ptr<onert::exec::ExecutorMap> &executors) noexcept
+ void publish(std::shared_ptr<onert::exec::Executors> &executors) noexcept
{
- executors = _executors;
+ executors = _artifact ? _artifact->_executors : nullptr;
}
private:
- std::shared_ptr<onert::ir::Subgraphs> _subgraphs;
- // TODO Refine the ownership of TracingCtx
- // In case of nnfw API, nnfw_session has ownership of TracingCtx.
- // In case of nnapi, there is no concept of session and primary model might have the ownership
- // of TracingCtx.
- // Since we don't support multiple models yet with nnapi in ONE, let's implement this later
- // and let's make it work with one model for now.
- std::unique_ptr<onert::util::TracingCtx> _tracing_ctx;
-
+ std::shared_ptr<onert::ir::Model> _model;
+ std::unique_ptr<onert::compiler::CompilerOptions> _coptions;
std::shared_ptr<onert::compiler::Compiler> _compiler;
- std::shared_ptr<onert::exec::ExecutorMap> _executors;
+ std::shared_ptr<onert::compiler::CompilerArtifact> _artifact;
};
#endif
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
index 70c5d2a4b..110c7cd55 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksExecution.h
@@ -26,7 +26,7 @@
struct ANeuralNetworksExecution
{
public:
- ANeuralNetworksExecution(const std::shared_ptr<onert::exec::ExecutorMap> &executors)
+ ANeuralNetworksExecution(const std::shared_ptr<onert::exec::Executors> &executors)
: _execution{std::make_shared<onert::exec::Execution>(executors)}
{
// DO NOTHING
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
index 81ffa26f3..a641368ec 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.cc
@@ -273,16 +273,16 @@ void ANeuralNetworksModel::fillOptionalOperand(void)
});
}
-std::shared_ptr<onert::ir::Subgraphs> ANeuralNetworksModel::getSubGraphs() const
+std::shared_ptr<onert::ir::Model> ANeuralNetworksModel::getModel() const
{
- auto all_subgs = std::make_shared<onert::ir::Subgraphs>();
+ auto model = std::make_shared<onert::ir::Model>();
- all_subgs->push(onert::ir::SubgraphIndex{0}, _graph);
+ model->push(onert::ir::SubgraphIndex{0}, _graph);
// TODO Find all child subgraphs and copy them to all_subgs
// Must find the same subgraph by using to compare pointer of subgraphs and set subgraph's index
// to operands of control flow operations
// Must clean all child subgraphs's pointer to prevent memory leak in case of that graph has
// subgraph itself recursively
- return all_subgs;
+ return model;
}
diff --git a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
index 4301193d6..04f4cf0f2 100644
--- a/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
+++ b/runtime/onert/frontend/nnapi/wrapper/ANeuralNetworksModel.h
@@ -22,7 +22,7 @@
#include <NeuralNetworksEx.h>
#include "ir/Graph.h"
-#include "ir/Subgraphs.h"
+#include "ir/Model.h"
struct ANeuralNetworksModel
{
@@ -59,7 +59,7 @@ public:
size_t operandSize(uint32_t index) noexcept;
bool isUsageSet(uint32_t index) noexcept;
bool isOperationOutput(uint32_t index) noexcept;
- std::shared_ptr<onert::ir::Subgraphs> getSubGraphs() const;
+ std::shared_ptr<onert::ir::Model> getModel() const;
private:
void setOptionalOperand(const onert::ir::OperandIndex idx);
diff --git a/runtime/onert/frontend/tflite/include/tflite_loader.h b/runtime/onert/frontend/tflite/include/tflite_loader.h
index dda34cc6a..cf17863f5 100644
--- a/runtime/onert/frontend/tflite/include/tflite_loader.h
+++ b/runtime/onert/frontend/tflite/include/tflite_loader.h
@@ -26,7 +26,7 @@ namespace onert
namespace tflite_loader
{
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
+std::unique_ptr<ir::Model> loadModel(const std::string &filename);
} // namespace tflite_loader
} // namespace onert
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc
index 3b160473d..fe69e4e2a 100644
--- a/runtime/onert/frontend/tflite/src/tflite_loader.cc
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -154,12 +154,12 @@ void TFLiteLoader::loadBatchMatMul(const Operator *op, ir::Graph &subg)
} // namespace
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
+std::unique_ptr<ir::Model> loadModel(const std::string &filename)
{
- auto subgraphs = std::make_unique<ir::Subgraphs>();
- TFLiteLoader loader(subgraphs);
+ auto model = std::make_unique<ir::Model>();
+ TFLiteLoader loader(model);
loader.loadFromFile(filename);
- return subgraphs;
+ return model;
}
} // namespace tflite_loader
diff --git a/runtime/onert/frontend/trix/CMakeLists.txt b/runtime/onert/frontend/trix/CMakeLists.txt
index 7a0df4eaa..8d9063f6c 100644
--- a/runtime/onert/frontend/trix/CMakeLists.txt
+++ b/runtime/onert/frontend/trix/CMakeLists.txt
@@ -2,7 +2,7 @@ if (NOT BUILD_TRIX_LOADER)
return()
endif ()
-nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET)
+nnfw_find_package(TRIXEngine QUIET 2.5.0)
if(TRIXEngine_FOUND)
list(APPEND SOURCES src/trix_loader.cc)
else()
diff --git a/runtime/onert/frontend/trix/include/trix_loader.h b/runtime/onert/frontend/trix/include/trix_loader.h
index 297d5ec28..26d6a3c56 100644
--- a/runtime/onert/frontend/trix/include/trix_loader.h
+++ b/runtime/onert/frontend/trix/include/trix_loader.h
@@ -27,7 +27,7 @@ namespace trix_loader
/**
* @throw runtime_error when tvn path is wrong or tvn is invalid
*/
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
+std::unique_ptr<ir::Model> loadModel(const std::string &filename);
} // namespace trix_loader
} // namespace onert
diff --git a/runtime/onert/frontend/trix/src/trix_loader.cc b/runtime/onert/frontend/trix/src/trix_loader.cc
index e2995bbd1..cdf239648 100644
--- a/runtime/onert/frontend/trix/src/trix_loader.cc
+++ b/runtime/onert/frontend/trix/src/trix_loader.cc
@@ -67,11 +67,11 @@ void TrixMetaReader::init(const char *path)
_meta = getNPUmodel_metadata(path, false);
if (_meta == nullptr)
{
- throw std::runtime_error("Failed to get TRIV2 model metadata");
+ throw std::runtime_error("Failed to get TRIX model metadata");
}
if (NPUBIN_VERSION(_meta->magiccode) != 3)
{
- throw std::runtime_error("TRIV2 model metadata version mismatched.");
+ throw std::runtime_error("TRIX model metadata version mismatched.");
}
}
@@ -81,9 +81,9 @@ public:
/**
* @brief Construct a new Loader object
*
- * @param graph reference on subgraphs
+ * @param model reference on model
*/
- explicit TrixLoader(std::unique_ptr<ir::Subgraphs> &subgs) : _subgraphs(subgs) {}
+ explicit TrixLoader(std::unique_ptr<ir::Model> &model) : _model(model) {}
/**
* @brief Load a model from file
@@ -97,7 +97,6 @@ private:
* @throw runtime_error when tvn path is wrong or tvn is invalid
*/
void loadModel();
- void loadSubgraphs();
std::unique_ptr<ir::Graph> loadSubgraph();
void loadOperands(ir::Graph &subg);
ir::OperandIndex loadOperandFromInput(uint32_t i, ir::Graph &subg);
@@ -112,8 +111,11 @@ private:
protected:
/** path to model (e.g. tvn) */
std::string _model_path;
+ /** original IO shapes */
+ std::vector<ir::Shape> _origin_input_shapes;
+ std::vector<ir::Shape> _origin_output_shapes;
/** Reference on loadable subgraphs */
- std::unique_ptr<ir::Subgraphs> &_subgraphs;
+ std::unique_ptr<ir::Model> &_model;
TrixMetaReader _meta;
};
@@ -154,6 +156,8 @@ void TrixLoader::loadBulk(ir::Graph &subg)
{
ir::operation::Bulk::Param param;
param.binary_path = _model_path;
+ param.origin_input_shapes = _origin_input_shapes;
+ param.origin_output_shapes = _origin_output_shapes;
ir::OperandIndexSequence inputs;
ir::OperandIndexSequence outputs;
@@ -175,6 +179,7 @@ ir::OperandIndex TrixLoader::loadOperandFromInput(uint32_t idx, ir::Graph &subg)
ir::TypeInfo type_info(toDataType(_meta.input_seg_quant_type(idx)),
_meta.input_seg_quant_scale(idx), _meta.input_seg_quant_zp(idx));
+ _origin_input_shapes.push_back(shape);
// Create operand
const auto operand_index = subg.addOperand(shape, type_info);
return operand_index;
@@ -191,6 +196,7 @@ ir::OperandIndex TrixLoader::loadOperandFromOutput(uint32_t idx, ir::Graph &subg
ir::TypeInfo type_info(toDataType(_meta.output_seg_quant_type(idx)),
_meta.output_seg_quant_scale(idx), _meta.output_seg_quant_zp(idx));
+ _origin_output_shapes.push_back(shape);
// Create operand
const auto operand_index = subg.addOperand(shape, type_info);
return operand_index;
@@ -237,15 +243,13 @@ std::unique_ptr<ir::Graph> TrixLoader::loadSubgraph()
return subg;
}
-void TrixLoader::loadSubgraphs()
+void TrixLoader::loadModel()
{
// one subgraph only
auto subg = loadSubgraph();
- _subgraphs->push(ir::SubgraphIndex(0), std::move(subg));
+ _model->push(ir::SubgraphIndex(0), std::move(subg));
}
-void TrixLoader::loadModel() { loadSubgraphs(); }
-
void TrixLoader::loadFromFile(const std::string &file_path)
{
// model path will be used to set Bulk param
@@ -255,12 +259,12 @@ void TrixLoader::loadFromFile(const std::string &file_path)
loadModel();
}
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
+std::unique_ptr<ir::Model> loadModel(const std::string &filename)
{
- auto subgraphs = std::make_unique<ir::Subgraphs>();
- TrixLoader loader(subgraphs);
+ auto model = std::make_unique<ir::Model>();
+ TrixLoader loader(model);
loader.loadFromFile(filename);
- return subgraphs;
+ return model;
}
} // namespace trix_loader
} // namespace onert
diff --git a/runtime/onert/frontend/trix/src/trix_loader_dummy.cc b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc
index 9fc8e1ff2..eecbd2217 100644
--- a/runtime/onert/frontend/trix/src/trix_loader_dummy.cc
+++ b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc
@@ -22,10 +22,10 @@ namespace onert
{
namespace trix_loader
{
-std::unique_ptr<ir::Subgraphs> loadModel(const std::string &)
+std::unique_ptr<ir::Model> loadModel(const std::string &)
{
- auto subgraphs = std::make_unique<ir::Subgraphs>();
- return subgraphs;
+ auto model = std::make_unique<ir::Model>();
+ return model;
}
} // namespace trix_loader
} // namespace onert
diff --git a/runtime/onert/test/CMakeLists.txt b/runtime/onert/test/CMakeLists.txt
deleted file mode 100644
index 38899976d..000000000
--- a/runtime/onert/test/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-set(TEST_ONERT test_onert)
-
-file(GLOB_RECURSE TESTS "*.cc")
-
-add_executable(${TEST_ONERT} ${TESTS})
-
-target_include_directories(${TEST_ONERT} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../core/src)
-
-target_link_libraries(${TEST_ONERT} onert_core)
-target_link_libraries(${TEST_ONERT} gtest)
-target_link_libraries(${TEST_ONERT} gtest_main)
-target_link_libraries(${TEST_ONERT} ${LIB_PTHREAD} dl)
-add_test(${TEST_ONERT} ${TEST_ONERT})
-
-install(TARGETS ${TEST_ONERT} DESTINATION unittest_standalone)
diff --git a/runtime/onert/test/core/compiler/HEScheduler.cc b/runtime/onert/test/core/compiler/HEScheduler.cc
deleted file mode 100644
index 514c01485..000000000
--- a/runtime/onert/test/core/compiler/HEScheduler.cc
+++ /dev/null
@@ -1,573 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <compiler/HEScheduler.h>
-#include <exec/ExecTime.h>
-
-#include <ir/Shape.h>
-#include <ir/InternalType.h>
-#include <ir/TypeInfo.h>
-#include <ir/DataType.h>
-
-#include <ir/operation/BinaryArithmetic.h>
-#include <ir/operation/FullyConnected.h>
-
-#include <gtest/gtest.h>
-
-namespace
-{
-using namespace onert;
-using namespace ir;
-using namespace backend;
-using namespace operation;
-using namespace exec;
-
-//
-// Mock backends classes
-//
-
-struct MockConfigCPU : public IConfig
-{
- std::string id() override { return "cpu"; }
- bool initialize() override { return true; };
- bool supportPermutation() override { return false; }
- Layout supportLayout(const Operation &, Layout) override { return Layout::UNKNOWN; }
- bool supportDynamicTensor() override { return false; }
- bool supportFP16() override { return false; }
-};
-
-class MockBackendContext : public BackendContext
-{
-public:
- using BackendContext::BackendContext;
- ITensorRegistry *genTensors() override { return nullptr; }
- FunctionMap genKernels() override { return {}; }
-};
-
-struct MockBackendCPU : public Backend
-{
- std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigCPU>(); }
- std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
- {
- return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
- }
-};
-
-struct MockConfigGPU : public IConfig
-{
- std::string id() override { return "gpu"; }
- bool initialize() override { return true; };
- bool supportPermutation() override { return false; }
- ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
- {
- return ir::Layout::UNKNOWN;
- }
- bool supportDynamicTensor() override { return false; }
- bool supportFP16() override { return false; }
-};
-
-struct MockBackendGPU : public Backend
-{
- std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigGPU>(); }
- std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
- {
- return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
- }
-};
-
-struct MockConfigNPU : public IConfig
-{
- std::string id() override { return "npu"; }
- bool initialize() override { return true; };
- bool supportPermutation() override { return false; }
- ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
- {
- return ir::Layout::UNKNOWN;
- }
- bool supportDynamicTensor() override { return false; }
- bool supportFP16() override { return false; }
-};
-
-struct MockBackendNPU : public Backend
-{
- std::shared_ptr<IConfig> config() const override { return std::make_shared<MockConfigNPU>(); }
- std::unique_ptr<BackendContext> newContext(ContextData &&data) const override
- {
- return std::make_unique<MockBackendContext>(this, std::move(data), nullptr);
- }
-};
-
-//
-// Constants
-//
-
-const int OPERAND_ELEMS = 268203;
-const int OPERAND_SIZE = OPERAND_ELEMS * 4;
-const int OPERATION_SIZE = OPERAND_SIZE * 3;
-
-const std::string LINEAR("Linear");
-const std::string DATAFLOW("Dataflow");
-const std::string PARALLEL("Parallel");
-
-//
-// Helper functions
-//
-
-// Set executor through environment variable
-void setExecutor(const std::string &executor) { setenv("EXECUTOR", executor.c_str(), true); }
-
-// Set profiling mode through environment variable
-void setProfilingMode(const bool value) { setenv("PROFILING_MODE", value ? "1" : "0", true); }
-
-// Calculate operation size by addition sizes of all input and output operands
-uint32_t calcOpSize(const std::shared_ptr<Graph> &graph, const OperationIndex &op_idx)
-{
- uint32_t size = 0;
- const auto &op = graph->operations().at(op_idx);
- for (const auto &ind : op.getInputs() + op.getOutputs())
- size += graph->operands().at(ind).info().total_size();
- return size;
-}
-
-// Set execution operation time. This method is needed since ExecutionTime has only
-// 'updateOperationExecTime' method.
-void setOperationExecTime(ExecTime &et, const Backend *backend, const std::string &operation,
- bool quant, uint32_t op_size, int64_t time)
-{
- // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
- assert(time > 0);
- int64_t prev_time = et.getOperationExecTime(backend, operation, quant, op_size);
- int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
- et.updateOperationExecTime(backend, operation, quant, op_size, time_to_set);
- assert(et.getOperationExecTime(backend, operation, quant, op_size) == time);
-}
-
-// Set same execution time for all given backends/operations
-void setOperationsExecutionTime(const std::vector<const Backend *> &backends,
- const std::vector<std::string> &op_names,
- const std::vector<uint32_t> &op_sizes, int64_t exec_time)
-{
- assert(op_names.size() == op_sizes.size());
- ExecTime et(backends);
- for (int i = 0; i < op_names.size(); ++i)
- {
- for (auto &backend : backends)
- setOperationExecTime(et, backend, op_names[i], false, op_sizes[i], exec_time);
- }
- et.storeOperationsExecTime();
-}
-
-// Set permute time from one backend to another. This method is needed since ExecutionTime has only
-// 'updatePermuteTime' method.
-void setPermutationTime(ExecTime &et, const Backend *from_backend, const Backend *to_backend,
- bool quant, uint32_t op_size, int64_t time)
-{
- // You shouldn't set negative time with this method since nnfw JSON deserializer can't read it
- assert(time > 0);
- int64_t prev_time = et.getPermuteTime(from_backend, to_backend, quant, op_size);
- int64_t time_to_set = prev_time == ExecTime::NOT_FOUND ? time : 2 * time - prev_time;
- et.updatePermuteTime(from_backend, to_backend, quant, op_size, time_to_set);
- assert(et.getPermuteTime(from_backend, to_backend, quant, op_size) == time);
-}
-
-// Set same permutation time between all given backends
-void setPermutationsExecutionTime(const std::vector<const Backend *> &backends,
- const int operand_size, const int64_t exec_time)
-{
- ExecTime et(backends);
- for (const auto &backend : backends)
- {
- for (auto &other_backend : backends)
- {
- if (backend == other_backend)
- continue;
- setPermutationTime(et, backend, other_backend, false, operand_size, exec_time);
- }
- }
- et.storeOperationsExecTime();
-}
-
-//
-// Functions for creating graphs
-//
-
-using OIS = OperandIndexSequence;
-
-template <typename NodeT, typename... Types>
-OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args)
-{
- auto op = std::make_unique<NodeT>(std::forward<Types>(args)...);
- auto op_idx = graph->addOperation(std::move(op));
- // For now in scheduler test all operations in tested graphs has same size (for simplicity)
- assert(calcOpSize(graph, op_idx) == OPERATION_SIZE);
- return op_idx;
-}
-
-// Create straight graph: Add->Sub->Mul
-std::shared_ptr<Graph> createStraightGraph()
-{
- auto graph = std::make_shared<Graph>();
- const TypeInfo float_op(DataType::FLOAT32);
-
- // Create add node
- auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
- create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
-
- // Create sub node
- auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
- create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params);
-
- // Create mul node
- auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
- create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params);
-
- graph->verify();
- return graph;
-}
-
-/* Create branched graph:
- * [Add]
- * // \\
- * [Mul1] [FC2]
- * || ||
- * [Mul2] [FC2]
- * \\ //
- * [Sub]
- */
-std::shared_ptr<Graph> createBranchedGraph()
-{
- auto graph = std::make_shared<Graph>();
- const TypeInfo float_op(DataType::FLOAT32);
-
- // Create add node
- auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE};
- create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params);
-
- // Create mul1 node
- auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
- create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx},
- mul1_op_params);
-
- // Create mul2 node
- auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE};
- create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx},
- mul2_op_params);
-
- // Create fc1 node
- auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- FullyConnected::Param fc1_op_params{Activation::NONE};
- create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params);
-
- // Create fc2 node
- auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- FullyConnected::Param fc2_op_params{Activation::NONE};
- create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params);
-
- // Create sub node
- auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op);
- BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE};
- create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params);
-
- graph->verify();
- return graph;
-}
-
-//
-// Tests setup/teardown
-//
-
-// SetUp/TearDown methods runs before/after each test and performs actions common for each test
-class HESchedulerTest : public ::testing::Test
-{
-protected:
- void SetUp() override
- {
- // Initialize mock backends
- _cpu_backend = new MockBackendCPU();
- _gpu_backend = new MockBackendGPU();
- _npu_backend = new MockBackendNPU();
- _mock_backends = {_cpu_backend, _gpu_backend, _npu_backend};
-
- // Remove previous profile data if it exists
- if (!remove("exec_time.json"))
- {
- // DO NOTHING (no profile data)
- }
-
- // Remember original value of 'EXECUTOR' environment variable
- char *executor = std::getenv("EXECUTOR");
- _original_executor = executor == nullptr ? "" : executor;
-
- // Remember original value of 'PROFILING_MODE' environment variable
- char *profiling_mode = std::getenv("PROFILING_MODE");
- _original_profiling_mode = profiling_mode == nullptr ? "" : profiling_mode;
- }
-
- void TearDown() override
- {
- delete _cpu_backend;
- delete _gpu_backend;
- delete _npu_backend;
- EXPECT_EQ(remove("exec_time.json"), 0);
- setenv("EXECUTOR", _original_executor.c_str(), true);
- setenv("PROFILING_MODE", _original_profiling_mode.c_str(), true);
- }
-
- const MockBackendCPU *_cpu_backend{nullptr};
- const MockBackendGPU *_gpu_backend{nullptr};
- const MockBackendNPU *_npu_backend{nullptr};
- std::vector<const Backend *> _mock_backends;
-
- std::string _original_executor;
- std::string _original_profiling_mode;
-};
-
-//
-// HEScheduler tests
-//
-
-class HESchedulerTestWithExecutorParam : public HESchedulerTest,
- public testing::WithParamInterface<std::string>
-{
-};
-
-// SchedulerTestWithExecutorParam tests are parameterized with executor name and runs three times -
-// one time for each executor
-INSTANTIATE_TEST_CASE_P(AllExecutors, HESchedulerTestWithExecutorParam,
- testing::Values(LINEAR, DATAFLOW, PARALLEL));
-
-// Test scheduler behavior for straight graph with known execution time of all nodes and permutes.
-TEST_P(HESchedulerTestWithExecutorParam, straight_graph_known_exec_time)
-{
- setExecutor(GetParam());
-
- // Prepare graph
- ir::Subgraphs subgs;
- auto graph(createStraightGraph());
- subgs.push(ir::SubgraphIndex{0}, graph);
- OperationIndex add_op_idx(0), sub_op_idx(1), mul_op_idx(2);
-
- // Set default execution and transfer time
- setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1);
- setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul"},
- {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
-
- // Test 1
- // Expected behaviour: scheduler assigns different backend to each node
- {
- // For each backend reduce execution time of one node
- ExecTime et(_mock_backends);
- setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, 1);
- setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, 1);
- setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, 1);
- et.storeOperationsExecTime();
-
- // Test scheduler
- auto scheduler =
- compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
- const auto br = scheduler.schedule(*graph);
- ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
- ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "gpu");
- ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "npu");
- }
-
- // Test 2
- // Expected behaviour: scheduler assigns single backend to all nodes because of big transfer time
- {
- // Increase transfer time
- setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1e5);
-
- // Test scheduler
- auto scheduler =
- compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
- const auto br = scheduler.schedule(*graph);
- ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "cpu");
- ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
- ASSERT_EQ(br->getBackend(mul_op_idx)->config()->id(), "cpu");
- }
-}
-
-// Test scheduler behavior for branched graph with known execution time of all nodes and permutes
-TEST_P(HESchedulerTestWithExecutorParam, branched_graph_known_exec_time)
-{
- const int64_t NPU_ET = 5000;
- setExecutor(GetParam());
-
- // Prepare graph
- ir::Subgraphs subgs;
- auto graph(createBranchedGraph());
- subgs.push(ir::SubgraphIndex{0}, graph);
- OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
- sub_op_idx(5);
-
- // Set default execution and transfer time
- setPermutationsExecutionTime(_mock_backends, OPERAND_SIZE, 1000);
- setOperationsExecutionTime(_mock_backends, {"Add", "Sub", "Mul", "FullyConnected"},
- {OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE, OPERATION_SIZE}, 1e4);
-
- // Test 1
- // Expected behaviour: for dataflow and linear executors scheduler assigns fastest backend to all
- // nodes, in case of parallel executor scheduler assigns different backends to branches.
- {
- // Reduce execution time
- ExecTime et(_mock_backends);
- setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, NPU_ET);
- setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, NPU_ET);
- setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, NPU_ET);
- setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET);
- setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET + 1000);
- setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET + 1000);
- et.storeOperationsExecTime();
-
- // Test scheduler
- auto scheduler =
- compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
- const auto br = scheduler.schedule(*graph);
-
- std::string branch1_expected_backend("npu"), branch2_expected_backend("npu");
- if (GetParam() == PARALLEL)
- {
- branch1_expected_backend =
- br->getBackend(mul1_op_idx)->config()->id() == "npu" ? "npu" : "gpu";
- branch2_expected_backend = branch1_expected_backend == "npu" ? "gpu" : "npu";
- }
-
- ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), branch1_expected_backend);
- ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), branch1_expected_backend);
- ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), branch2_expected_backend);
- ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), branch2_expected_backend);
- ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
- }
-
- // Test 2
- // Expected behaviour: scheduler assigns single backend to all nodes
- {
- // Increase execution time for GPU backend
- ExecTime et(_mock_backends);
- /* for parallel executor: set a time, that is larger than sum_of_other_branches_nodes_cnt *
- * npu_exec_time so that npu is prefered: the ith branch will wait for npu until it finishes the
- * [0;i-1] branches nodes in DFS order. In each branch it goes deep intul doesn't encounter
- * branching or scheduler assigns another backend to a node*/
- setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, NPU_ET * 3 + 1);
- setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, NPU_ET * 3 + 1);
- et.storeOperationsExecTime();
-
- // Test scheduler
- auto scheduler =
- compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
- const auto br = scheduler.schedule(*graph);
- ASSERT_EQ(br->getBackend(add_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "npu");
- }
-}
-
-// Test scheduler behavior for branched graph and enabled profiling mode
-TEST_F(HESchedulerTest, branched_graph_profiling_mode)
-{
- const int ET = 1e5;
-
- // Turn on profiling mode
- setProfilingMode(true);
- setExecutor(DATAFLOW);
-
- // Prepare graph
- ir::Subgraphs subgs;
- auto graph(createBranchedGraph());
- subgs.push(ir::SubgraphIndex{0}, graph);
- OperationIndex add_op_idx(0), mul1_op_idx(1), mul2_op_idx(2), fc1_op_idx(3), fc2_op_idx(4),
- sub_op_idx(5);
-
- // Test 1
- // Expected behaviour: scheduler assigns backends to nodes with unknown execution time
- {
- // Set execution time for all backends/nodes except for cpu/Sub, npu/Mul, gpu/FC
- ExecTime et(_mock_backends);
- setOperationExecTime(et, _cpu_backend, "Add", false, OPERATION_SIZE, ET);
- setOperationExecTime(et, _cpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
- setOperationExecTime(et, _cpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
- setOperationExecTime(et, _npu_backend, "Add", false, OPERATION_SIZE, ET);
- setOperationExecTime(et, _npu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
- setOperationExecTime(et, _npu_backend, "Sub", false, OPERATION_SIZE, ET);
- setOperationExecTime(et, _gpu_backend, "Add", false, OPERATION_SIZE, ET);
- setOperationExecTime(et, _gpu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
- setOperationExecTime(et, _gpu_backend, "Sub", false, OPERATION_SIZE, ET);
- et.storeOperationsExecTime();
-
- // Test scheduler
- auto scheduler =
- compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
- const auto br = scheduler.schedule(*graph);
- ASSERT_EQ(br->getBackend(mul1_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(mul2_op_idx)->config()->id(), "npu");
- ASSERT_EQ(br->getBackend(fc1_op_idx)->config()->id(), "gpu");
- ASSERT_EQ(br->getBackend(fc2_op_idx)->config()->id(), "gpu");
- ASSERT_EQ(br->getBackend(sub_op_idx)->config()->id(), "cpu");
- }
-
- // Test 2
- // Expected behaviour: scheduler shuffling backends, so different backends are assigned to
- // neighbor nodes
- {
- // Set execution time for rest backends/nodes (cpu/Sub, npu/Mul, gpu/FC)
- ExecTime et(_mock_backends);
- setOperationExecTime(et, _cpu_backend, "Sub", false, OPERATION_SIZE, ET);
- setOperationExecTime(et, _npu_backend, "Mul", false, OPERATION_SIZE, ET + 1);
- setOperationExecTime(et, _gpu_backend, "FullyConnected", false, OPERATION_SIZE, ET);
- et.storeOperationsExecTime();
-
- // Test scheduler
- auto scheduler =
- compiler::HEScheduler(_mock_backends, compiler::fetchCompilerOptionsFromGlobalConfig(subgs));
- const auto br = scheduler.schedule(*graph);
- ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
- br->getBackend(mul1_op_idx)->config()->id());
- ASSERT_NE(br->getBackend(add_op_idx)->config()->id(),
- br->getBackend(fc1_op_idx)->config()->id());
- ASSERT_NE(br->getBackend(mul1_op_idx)->config()->id(),
- br->getBackend(mul2_op_idx)->config()->id());
- ASSERT_NE(br->getBackend(fc1_op_idx)->config()->id(),
- br->getBackend(fc2_op_idx)->config()->id());
- ASSERT_NE(br->getBackend(mul2_op_idx)->config()->id(),
- br->getBackend(sub_op_idx)->config()->id());
- ASSERT_NE(br->getBackend(fc2_op_idx)->config()->id(),
- br->getBackend(sub_op_idx)->config()->id());
- }
-}
-
-// TODO: Add tests with unknown execution and permutation time
-
-} // unnamed namespace
diff --git a/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc b/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc
deleted file mode 100644
index b18dedd15..000000000
--- a/runtime/onert/test/core/compiler/pass/UnusedOperandEliminationPass.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include "ir/Graph.h"
-#include "compiler/pass/UnusedOperandEliminationPass.h"
-
-using namespace onert::ir;
-using namespace onert::compiler::pass;
-
-TEST(UnusedOperandEliminationPass, Simple)
-{
- Graph graph;
-
- // Add tensors
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::FLOAT32};
- auto in = graph.addOperand(shape, type);
- auto out = graph.addOperand(shape, type);
-
- auto unused = graph.addOperand(shape, type);
-
- // Set model inputs/outputs
- graph.addInput(in);
- graph.addOutput(out);
-
- UnusedOperandEliminationPass{graph}.run();
-
- ASSERT_TRUE(graph.operands().exist(in));
- ASSERT_TRUE(graph.operands().exist(out));
- ASSERT_FALSE(graph.operands().exist(unused));
-}
diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc
deleted file mode 100644
index 0183b6276..000000000
--- a/runtime/onert/test/core/exec/ExecInstance.cc
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <thread>
-
-#include "ir/Graph.h"
-#include "compiler/Compiler.h"
-#include "exec/Execution.h"
-#include "ir/operation/BinaryArithmetic.h"
-#include "util/TracingCtx.h"
-
-namespace
-{
-
-using namespace onert::ir;
-
-class CompiledMockUpModel
-{
-public:
- CompiledMockUpModel()
- {
- // Model: two elementwise add operation
- // model input: lhs, rhs1
- // model output: second add result (result2)
- // constant: rhs2
- // result1 <= (lhs + rhs)
- // result2 <= (result1 + rhs2)
- // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
- // activation: none (constant)
- graph = std::make_shared<Graph>();
- // 1st add operands (result1 <= lhs + rhs1)
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::FLOAT32};
- static float rhs2_data[4] = {3, 1, -1, 5};
- auto operand_lhs = graph->addOperand(shape, type);
- auto operand_rhs1 = graph->addOperand(shape, type);
- auto operand_result1 = graph->addOperand(shape, type);
- auto operand_rhs2 = graph->addOperand(shape, type);
- auto operand_result2 = graph->addOperand(shape, type);
- graph->operands()
- .at(operand_rhs2)
- .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
- // 2nd add operations (result2 <= result1 + rhs2)
- operation::BinaryArithmetic::Param param1;
- param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param1.activation = Activation::NONE;
- auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
- auto output_set1 = OperandIndexSequence{operand_result1};
- graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
- operation::BinaryArithmetic::Param param2;
- param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param2.activation = Activation::NONE;
- auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
- auto output_set2 = OperandIndexSequence{operand_result2};
- graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
- // Identify model inputs and outputs
- graph->addInput(operand_lhs);
- graph->addInput(operand_rhs1);
- graph->addOutput(operand_result2);
- graph->verify();
-
- // Compile
- auto subgs = std::make_shared<onert::ir::Subgraphs>();
- subgs->push(onert::ir::SubgraphIndex{0}, graph);
- tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get());
- onert::compiler::Compiler compiler{subgs, tracing_ctx.get()};
- executors = compiler.compile();
- }
-
-public:
- std::shared_ptr<Graph> graph;
- std::shared_ptr<onert::exec::ExecutorMap> executors;
- std::unique_ptr<onert::util::TracingCtx> tracing_ctx;
-};
-
-TEST(ExecInstance, simple)
-{
- auto mockup = CompiledMockUpModel();
- auto graph = mockup.graph;
- auto executors = mockup.executors;
-
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto output = IOIndex{0};
-
- const float input1_buffer[4] = {1, 0, -1, -2};
- const float input2_buffer[4] = {1, -3, 2, -4};
- float output_buffer[4] = {};
- const float output_expected[4] = {5, -2, 0, -1};
-
- onert::exec::Execution execution{executors};
-
- execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
- execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
- execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
- execution.execute();
-
- for (auto i = 0; i < 4; i++)
- {
- EXPECT_EQ(output_buffer[i], output_expected[i]);
- }
-}
-
-TEST(ExecInstance, twoCompile)
-{
- auto mockup = CompiledMockUpModel();
- auto graph = mockup.graph;
- auto executors1 = mockup.executors;
- onert::exec::Execution execution1{executors1};
-
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto output = IOIndex{0};
-
- const float exe1_input1_buffer[4] = {1, 0, -1, -2};
- const float exe1_input2_buffer[4] = {1, -3, 2, -4};
- float exe1_output_buffer[4] = {};
- const float exe1_output_expected[4] = {5, -2, 0, -1};
-
- execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
- execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
- execution1.setOutput(output, reinterpret_cast<void *>(exe1_output_buffer), 16);
-
- // Make new executor: compile again
- auto subgs = std::make_shared<onert::ir::Subgraphs>();
- subgs->push(onert::ir::SubgraphIndex{0}, graph);
- auto tracing_ctx = std::make_unique<onert::util::TracingCtx>(subgs.get());
- onert::compiler::Compiler compiler{subgs, tracing_ctx.get()};
- std::shared_ptr<onert::exec::ExecutorMap> executors2 = compiler.compile();
- onert::exec::Execution execution2{executors2};
-
- const float exe2_input1_buffer[4] = {2, 1, -2, 0};
- const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
- float exe2_output_buffer[4] = {};
- const float exe2_output_expected[4] = {2, 5, -2, 7};
-
- execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
- execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
- execution2.setOutput(output, reinterpret_cast<void *>(exe2_output_buffer), 16);
-
- execution1.execute();
- execution2.execute();
-
- for (auto i = 0; i < 4; i++)
- {
- EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
- EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
- }
-}
-
-// Support two initialized execution instance then ordered execution
-TEST(ExecInstance, twoExecution)
-{
- auto mockup = CompiledMockUpModel();
- auto executors = mockup.executors;
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto output1 = IOIndex{0};
-
- const float exe1_input1_buffer[4] = {1, 0, -1, -2};
- const float exe1_input2_buffer[4] = {1, -3, 2, -4};
- float exe1_output_buffer[4] = {};
- const float exe1_output_expected[4] = {5, -2, 0, -1};
- const float exe2_output_expected[4] = {2, 5, -2, 7};
-
- onert::exec::Execution execution1{executors};
- execution1.setInput(input1, reinterpret_cast<const void *>(exe1_input1_buffer), 16);
- execution1.setInput(input2, reinterpret_cast<const void *>(exe1_input2_buffer), 16);
- execution1.setOutput(output1, reinterpret_cast<void *>(exe1_output_buffer), 16);
-
- const float exe2_input1_buffer[4] = {2, 1, -2, 0};
- const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
- float exe2_output_buffer[4] = {};
-
- // Make new execution
- onert::exec::Execution execution2{executors};
- execution2.setInput(input1, reinterpret_cast<const void *>(exe2_input1_buffer), 16);
- execution2.setInput(input2, reinterpret_cast<const void *>(exe2_input2_buffer), 16);
- execution2.setOutput(output1, reinterpret_cast<void *>(exe2_output_buffer), 16);
-
- execution1.execute();
- execution2.execute();
-
- for (auto i = 0; i < 4; i++)
- {
- EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
- EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
- }
-}
-
-class Inference
-{
-public:
- Inference(const float (&input1)[4], const float (&input2)[4], float (&output)[4],
- std::shared_ptr<onert::exec::ExecutorMap> &executors)
- : _input1{input1}, _input2{input2}, _output{output}, _executors{executors}
- {
- // DO NOTHING
- }
-
- void inference(void)
- {
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto output1 = IOIndex{0};
-
- onert::exec::Execution execution{_executors};
- execution.setInput(input1, reinterpret_cast<const void *>(_input1), 16);
- execution.setInput(input2, reinterpret_cast<const void *>(_input2), 16);
- execution.setOutput(output1, reinterpret_cast<void *>(_output), 16);
-
- execution.execute();
- }
-
-private:
- const float (&_input1)[4];
- const float (&_input2)[4];
- float (&_output)[4];
- std::shared_ptr<onert::exec::ExecutorMap> &_executors;
-};
-
-// Support multi-thread execution
-TEST(ExecInstance, twoThreads)
-{
- auto mockup = CompiledMockUpModel();
- auto executors = mockup.executors;
-
- const float exe1_input1_buffer[4] = {1, 0, -1, -2};
- const float exe1_input2_buffer[4] = {1, -3, 2, -4};
- float exe1_output_buffer[4] = {};
- const float exe1_output_expected[4] = {5, -2, 0, -1};
-
- Inference execution1{exe1_input1_buffer, exe1_input2_buffer, exe1_output_buffer, executors};
-
- const float exe2_input1_buffer[4] = {2, 1, -2, 0};
- const float exe2_input2_buffer[4] = {-3, 3, 1, 2};
- float exe2_output_buffer[4] = {};
- const float exe2_output_expected[4] = {2, 5, -2, 7};
-
- Inference execution2{exe2_input1_buffer, exe2_input2_buffer, exe2_output_buffer, executors};
-
- std::thread t1{&Inference::inference, &execution1};
- std::thread t2{&Inference::inference, &execution2};
-
- t1.join();
- t2.join();
-
- for (auto i = 0; i < 4; i++)
- {
- EXPECT_EQ(exe1_output_buffer[i], exe1_output_expected[i]);
- EXPECT_EQ(exe2_output_buffer[i], exe2_output_expected[i]);
- }
-}
-
-// Support asynchronous execution
-TEST(ExecInstance, async)
-{
- auto mockup = CompiledMockUpModel();
- auto graph = mockup.graph;
- auto executors = mockup.executors;
-
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto output = IOIndex{0};
-
- const float input1_buffer[4] = {1, 0, -1, -2};
- const float input2_buffer[4] = {1, -3, 2, -4};
- float output_buffer[4] = {};
- const float output_expected[4] = {5, -2, 0, -1};
-
- onert::exec::Execution execution{executors};
-
- execution.setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16);
- execution.setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16);
- execution.setOutput(output, reinterpret_cast<void *>(output_buffer), 16);
- execution.startExecute();
- execution.waitFinish();
-
- for (auto i = 0; i < 4; i++)
- {
- EXPECT_EQ(output_buffer[i], output_expected[i]);
- }
-}
-
-} // namespace
diff --git a/runtime/onert/test/core/exec/ExecTime.test.cc b/runtime/onert/test/core/exec/ExecTime.test.cc
deleted file mode 100644
index 178b61ea5..000000000
--- a/runtime/onert/test/core/exec/ExecTime.test.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "exec/ExecTime.h"
-#include "backend/IConfig.h"
-#include "backend/Backend.h"
-#include <gtest/gtest.h>
-#include <string>
-
-namespace
-{
-using namespace onert;
-using namespace exec;
-using namespace backend;
-
-struct MockConfig : public IConfig
-{
- std::string id() override { return "b1"; }
- bool initialize() override { return true; };
- bool supportPermutation() override { return false; }
- ir::Layout supportLayout(const ir::Operation &, ir::Layout) override
- {
- return ir::Layout::UNKNOWN;
- }
- bool supportDynamicTensor() override { return false; }
- bool supportFP16() override { return false; }
-};
-
-struct MockBackend : public ::onert::backend::Backend
-{
- std::shared_ptr<onert::backend::IConfig> config() const override
- {
- return std::make_shared<MockConfig>();
- }
- std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&) const override
- {
- return nullptr;
- }
-};
-
-TEST(ExecTime, roundtrip_ok)
-{
- const auto *b = new MockBackend();
- std::vector<const Backend *> bs = {b};
- {
- ExecTime et(bs);
- et.updateOperationExecTime(b, "op1", true, 100, 100);
- et.updateOperationExecTime(b, "op1", true, 200, 200);
- et.updateOperationExecTime(b, "op1", false, 100, 888);
- et.storeOperationsExecTime();
- }
- {
- ExecTime et(bs);
- auto time = et.getOperationExecTime(b, "op1", true, 100);
- ASSERT_EQ(time, 100);
- // Check interpolation
- time = et.getOperationExecTime(b, "op1", true, 150);
- ASSERT_EQ(time, 150);
- time = et.getOperationExecTime(b, "op1", false, 100);
- ASSERT_EQ(time, 888);
- et.storeOperationsExecTime();
- }
- // clean up
- EXPECT_EQ(remove("exec_time.json"), 0);
-}
-
-TEST(ExecTime, structure)
-{
-
- const auto *b = new MockBackend();
- std::vector<const Backend *> bs = {b};
- {
- ExecTime et(bs);
- et.updateOperationExecTime(b, "op1", true, 100, 100);
- et.updateOperationExecTime(b, "op1", true, 200, 200);
- et.storeOperationsExecTime();
- }
- {
- ExecTime et(bs);
- auto time = et.getOperationExecTime(b, "op1", true, 100);
- ASSERT_EQ(time, 100);
- // Check interpolation
- time = et.getOperationExecTime(b, "op1", true, 200);
- ASSERT_EQ(time, 200);
- et.storeOperationsExecTime();
- }
- // clean up
- EXPECT_EQ(remove("exec_time.json"), 0);
-}
-} // unnamed namespace
diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc
deleted file mode 100644
index a9f7cd46a..000000000
--- a/runtime/onert/test/core/interp/ExecManager.cc
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "ir/Graph.h"
-#include "interp/InterpExecutor.h"
-#include "exec/Execution.h"
-#include "ir/operation/BinaryArithmetic.h"
-
-namespace
-{
-
-using namespace onert::ir;
-using InterpExecutor = onert::interp::InterpExecutor;
-using Execution = onert::exec::Execution;
-using ExecutorMap = onert::exec::ExecutorMap;
-
-class InterpExecutorTest : public ::testing::Test
-{
-protected:
- virtual void SetUp() {}
- void CreateSimpleModel()
- {
- // Model: one elementwise add operation
- // model input: lhs, rhs
- // model output: add result
- // lhs, rhs, result shape: {1, 2, 2, 1}
- // activation: none (constant)
- _graph = std::make_unique<Graph>();
-
- // Add operands
-
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::INT32};
- Shape shape_scalar(0);
- TypeInfo type_scalar{DataType::INT32};
-
- auto operand_lhs = _graph->addOperand(shape, type);
- auto operand_rhs = _graph->addOperand(shape, type);
- auto operand_result = _graph->addOperand(shape, type);
-
- // Add operations
-
- operation::BinaryArithmetic::Param param;
- param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param.activation = Activation::NONE;
- auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
- auto output_set = OperandIndexSequence{operand_result};
- _graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
-
- // Identify model inputs and outputs
-
- _graph->getInputs().append(operand_lhs);
- _graph->getInputs().append(operand_rhs);
- _graph->getOutputs().append(operand_result);
-
- _graph->verify();
-
- auto subgs = std::make_shared<onert::ir::Subgraphs>();
- subgs->push(onert::ir::SubgraphIndex{0}, _graph);
- _graph->setSubgraphs(subgs);
-
- _executors = std::make_shared<ExecutorMap>();
- _executors->insert(
- std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
- }
-
- void CreateTwoStepModel()
- {
- // Model: two elementwise add operation
- // model input: lhs, rhs1
- // model output: second add result (result2)
- // constant: rhs2
- // result1 <= (lhs + rhs)
- // result2 <= (result1 + rhs2)
- // lhs, rhs1, rh2, result1, result2 shape: {1, 2, 2, 1}
- // activation: none (constant)
- _graph = std::make_unique<Graph>();
-
- // 1st add operands (result1 <= lhs + rhs1)
-
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::INT32};
- Shape shape_scalar(0);
- TypeInfo type_scalar{DataType::INT32};
-
- static int32_t rhs2_data[4] = {3, 1, -1, 5};
-
- auto operand_lhs = _graph->addOperand(shape, type);
- auto operand_rhs1 = _graph->addOperand(shape, type);
- auto operand_result1 = _graph->addOperand(shape, type);
- auto operand_rhs2 = _graph->addOperand(shape, type);
- auto operand_result2 = _graph->addOperand(shape, type);
- _graph->operands()
- .at(operand_rhs2)
- .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16));
-
- // 2nd add operations (result2 <= result1 + rhs2)
-
- operation::BinaryArithmetic::Param param1;
- param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param1.activation = Activation::NONE;
- auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1};
- auto output_set1 = OperandIndexSequence{operand_result1};
- _graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1));
-
- operation::BinaryArithmetic::Param param2;
- param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param2.activation = Activation::NONE;
- auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2};
- auto output_set2 = OperandIndexSequence{operand_result2};
- _graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2));
-
- // Identify model inputs and outputs
-
- _graph->getInputs().append(operand_lhs);
- _graph->getInputs().append(operand_rhs1);
- _graph->getOutputs().append(operand_result2);
-
- _graph->verify();
-
- auto subgs = std::make_shared<onert::ir::Subgraphs>();
- subgs->push(onert::ir::SubgraphIndex{0}, _graph);
- _graph->setSubgraphs(subgs);
-
- _executors = std::make_shared<ExecutorMap>();
- _executors->insert(
- std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
- }
-
- void CreateUnspecifiedDimensionsModel()
- {
- // Model: one elementwise add operation
- // model input: lhs, rhs
- // model output: add result
- // lhs, rhs, result shape: {1, unknown, 2, 1}
- // activation: none (constant)
- _graph = std::make_unique<Graph>();
-
- // Add operands
-
- Shape shape{1, 0, 2, 1};
- TypeInfo type{DataType::INT32};
- Shape shape_scalar(0);
- TypeInfo type_scalar{DataType::INT32};
-
- auto operand_lhs = _graph->addOperand(shape, type);
- auto operand_rhs = _graph->addOperand(shape, type);
-
- auto operand_activation = _graph->addOperand(shape_scalar, type_scalar);
- _graph->operands()
- .at(operand_activation)
- .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&_activation_value), 4));
-
- auto operand_result = _graph->addOperand(shape, type);
-
- // Add operations
-
- operation::BinaryArithmetic::Param param;
- param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param.activation = Activation::NONE;
- auto input_set = OperandIndexSequence{operand_lhs, operand_rhs};
- auto output_set = OperandIndexSequence{operand_result};
- _graph->addOperation(
- std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param));
-
- // Identify model inputs and outputs
-
- _graph->getInputs().append(operand_lhs);
- _graph->getInputs().append(operand_rhs);
- _graph->getOutputs().append(operand_result);
-
- _graph->verify();
-
- auto subgs = std::make_shared<onert::ir::Subgraphs>();
- subgs->push(onert::ir::SubgraphIndex{0}, _graph);
- _graph->setSubgraphs(subgs);
-
- _executors = std::make_shared<ExecutorMap>();
- _executors->insert(
- std::make_pair(onert::ir::SubgraphIndex{0}, std::make_unique<InterpExecutor>(*_graph)));
- }
-
- void createExecution() { _execution = std::make_unique<Execution>(_executors); }
-
- virtual void TearDown() { _executors = nullptr; }
-
- std::shared_ptr<Graph> _graph{nullptr};
- std::shared_ptr<ExecutorMap> _executors{nullptr};
- std::unique_ptr<Execution> _execution{nullptr};
- const int32_t _activation_value{0};
-};
-
-TEST_F(InterpExecutorTest, create_empty)
-{
- Graph graph;
- graph.verify();
- auto executor = std::make_unique<InterpExecutor>(graph);
- ASSERT_NE(executor, nullptr);
-}
-
-TEST_F(InterpExecutorTest, create_simple)
-{
- CreateSimpleModel();
- ASSERT_NE(_executors, nullptr);
- ASSERT_NE(_executors->at(onert::ir::SubgraphIndex{0}), nullptr);
-}
-
-TEST_F(InterpExecutorTest, neg_setInput)
-{
- CreateSimpleModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
-
- EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setOutput)
-{
- CreateSimpleModel();
- createExecution();
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- int32_t output_buffer[4] = {};
-
- EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setInputForUnspecifiedDimensions)
-{
- CreateUnspecifiedDimensionsModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
-
- TypeInfo operand_type{DataType::INT32};
- Shape operand_shape{1, 2, 2, 1};
-
- EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
- reinterpret_cast<const void *>(input1_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setInput(input1, operand_type, operand_shape,
- reinterpret_cast<const void *>(input1_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setInput(input1, operand_type, operand_shape,
- reinterpret_cast<const void *>(input1_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, neg_setOutputForUnspecifiedDimensions)
-{
- CreateUnspecifiedDimensionsModel();
- createExecution();
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- TypeInfo operand_type{DataType::INT32};
- Shape operand_shape{1, 2, 2, 1};
-
- int32_t output_buffer[4] = {};
-
- EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
- reinterpret_cast<void *>(output_buffer), 4),
- std::runtime_error);
- EXPECT_THROW(_execution->setOutput(output, operand_type, operand_shape,
- reinterpret_cast<void *>(output_buffer), 12),
- std::runtime_error);
- EXPECT_NO_THROW(_execution->setOutput(output, operand_type, operand_shape,
- reinterpret_cast<void *>(output_buffer), 16));
-}
-
-TEST_F(InterpExecutorTest, execute)
-{
- CreateSimpleModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto input1_idx = _graph->getInputs().at(input1);
- auto input2_idx = _graph->getInputs().at(input2);
-
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
- const int32_t input2_buffer[4] = {1, -3, 2, -4};
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- int32_t output_buffer[4] = {};
-
- EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
- EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
- EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
- EXPECT_NO_THROW(_execution->execute());
- EXPECT_EQ(output_buffer[0], 2);
- EXPECT_EQ(output_buffer[1], -3);
- EXPECT_EQ(output_buffer[2], 1);
- EXPECT_EQ(output_buffer[3], -6);
-}
-
-TEST_F(InterpExecutorTest, executeTwoStep)
-{
- CreateTwoStepModel();
- createExecution();
-
- auto input1 = IOIndex{0};
- auto input2 = IOIndex{1};
- auto input1_idx = _graph->getInputs().at(input1);
- auto input2_idx = _graph->getInputs().at(input2);
-
- const int32_t input1_buffer[4] = {1, 0, -1, -2};
- const int32_t input2_buffer[4] = {1, -3, 2, -4};
-
- auto output = IOIndex{0};
- auto output_idx = _graph->getOutputs().at(output);
-
- int32_t output_buffer[4] = {};
-
- EXPECT_NO_THROW(_execution->setInput(input1, reinterpret_cast<const void *>(input1_buffer), 16));
- EXPECT_NO_THROW(_execution->setInput(input2, reinterpret_cast<const void *>(input2_buffer), 16));
- EXPECT_NO_THROW(_execution->setOutput(output, reinterpret_cast<void *>(output_buffer), 16));
- EXPECT_NO_THROW(_execution->execute());
- EXPECT_EQ(output_buffer[0], 5);
- EXPECT_EQ(output_buffer[1], -2);
- EXPECT_EQ(output_buffer[2], 0);
- EXPECT_EQ(output_buffer[3], -1);
-}
-
-} // namespace
diff --git a/runtime/onert/test/core/ir/Graph.cc b/runtime/onert/test/core/ir/Graph.cc
deleted file mode 100644
index d6de7c0cc..000000000
--- a/runtime/onert/test/core/ir/Graph.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/operation/BinaryArithmetic.h"
-#include "ir/verifier/Verifier.h"
-
-TEST(Graph, neg_inputs_and_outputs)
-{
- onert::ir::Graph graph;
-
- onert::ir::OperandIndex index0{0u};
- onert::ir::OperandIndex index1{1u};
-
- graph.addInput({index0});
- graph.addInput({index1});
-
- onert::ir::OperandIndex index10{10u};
- onert::ir::OperandIndex index11{11u};
- onert::ir::OperandIndex index12{12u};
-
- graph.addOutput({index10});
- graph.addOutput({index11});
- graph.addOutput({index12});
-
- ASSERT_EQ(graph.getInputs().size(), 2);
- ASSERT_EQ(graph.getOutputs().size(), 3);
-
- onert::ir::IOIndex io_index0{0};
- onert::ir::IOIndex io_index1{1};
- onert::ir::IOIndex io_index2{2};
-
- ASSERT_EQ(graph.getInputs().at(io_index0), 0);
- ASSERT_EQ(graph.getInputs().at(io_index1), 1);
-
- ASSERT_EQ(graph.getOutputs().at(io_index0), 10);
- ASSERT_EQ(graph.getOutputs().at(io_index1), 11);
- ASSERT_EQ(graph.getOutputs().at(io_index2), 12);
-
- EXPECT_THROW(graph.getOutputs().at(onert::ir::IOIndex{3}), std::out_of_range);
-}
-
-using namespace onert::ir;
-
-OperationIndex addAddOperation(Graph &graph, const OperandIndexSequence inputs,
- const OperandIndexSequence outputs)
-{
- // Add "ADD" operation
- operation::BinaryArithmetic::Param param;
- param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD;
- param.activation = Activation::NONE;
- return graph.addOperation(std::make_unique<operation::BinaryArithmetic>(inputs, outputs, param));
-}
-
-TEST(Graph, OneOpGraphSimpleValid)
-{
- // Simple Graph with just one Add operation
-
- Graph graph;
-
- // Add tensors
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::FLOAT32};
- auto lhs = graph.addOperand(shape, type);
- auto rhs = graph.addOperand(shape, type);
- auto res = graph.addOperand(shape, type);
-
- addAddOperation(graph, {lhs, rhs}, {res});
-
- // Set model inputs/outputs
- graph.addInput(lhs);
- graph.addInput(rhs);
- graph.addOutput(res);
-
- graph.verify();
-
- SUCCEED();
-}
-
-TEST(Graph, neg_InvalidGraph_BadInput)
-{
- Graph graph;
-
- // Add tensors
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::FLOAT32};
- auto in = graph.addOperand(shape, type);
- auto out = graph.addOperand(shape, type);
-
- // Set model inputs/outputs
- graph.addInput(in);
- graph.addOutput(out);
- graph.addInput(OperandIndex{89}); // Non-exisiting operand!
-
- EXPECT_ANY_THROW(graph.verify());
-}
-
-TEST(Graph, neg_InvalidGraph_BadOutput)
-{
- Graph graph;
-
- // Add tensors
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::FLOAT32};
- auto in = graph.addOperand(shape, type);
- auto out = graph.addOperand(shape, type);
-
- // Set model inputs/outputs
- graph.addInput(in);
- graph.addOutput(out);
- graph.addOutput(OperandIndex{12}); // Non-exisiting operand!
-
- EXPECT_ANY_THROW(graph.verify());
-}
-
-TEST(Graph, neg_InvalidAddOperation_BadInputIndex)
-{
- Graph graph;
-
- // Add tensors
- Shape shape{1, 2, 2, 1};
- TypeInfo type{DataType::FLOAT32};
- auto lhs = graph.addOperand(shape, type);
- auto rhs = graph.addOperand(shape, type);
- auto res = graph.addOperand(shape, type);
-
- // Set model inputs/outputs
- graph.addInput(lhs);
- graph.addInput(rhs);
- graph.addOutput(res);
-
- ASSERT_FALSE(addAddOperation(graph, {lhs, OperandIndex{99}}, {res}).valid());
-}
diff --git a/runtime/onert/test/core/ir/LayoutSet.cc b/runtime/onert/test/core/ir/LayoutSet.cc
deleted file mode 100644
index 591710a4d..000000000
--- a/runtime/onert/test/core/ir/LayoutSet.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/LayoutSet.h"
-
-using onert::ir::Layout;
-using onert::ir::LayoutSet;
-
-TEST(ir_LayoutSet, neg_add_remove)
-{
- LayoutSet set{Layout::NCHW};
- set.remove(Layout::NHWC);
- ASSERT_EQ(set.size(), 1);
- set.add(Layout::NHWC);
- ASSERT_EQ(set.size(), 2);
- set.remove(Layout::NHWC);
- ASSERT_EQ(set.size(), 1);
- set.remove(Layout::NCHW);
- ASSERT_EQ(set.size(), 0);
- set.remove(Layout::NCHW);
- ASSERT_EQ(set.size(), 0);
-}
-
-TEST(ir_LayoutSet, neg_add_twice)
-{
- LayoutSet set;
- set.add(Layout::NHWC);
- ASSERT_EQ(set.size(), 1);
- set.add(Layout::NHWC);
- ASSERT_EQ(set.size(), 1);
-}
-
-TEST(ir_LayoutSet, set_operators)
-{
- LayoutSet set1{Layout::NCHW};
- LayoutSet set2{Layout::NHWC};
- LayoutSet set3 = set1 | set2;
-
- ASSERT_EQ(set3.size(), 2);
-
- ASSERT_EQ((set3 - set1).size(), 1);
- ASSERT_EQ((set3 - set1).contains(Layout::NHWC), true);
- ASSERT_EQ((set3 - set2).size(), 1);
- ASSERT_EQ((set3 - set2).contains(Layout::NCHW), true);
- ASSERT_EQ((set3 - set3).size(), 0);
-
- ASSERT_EQ((set3 & set1).size(), 1);
- ASSERT_EQ((set3 & set1).contains(Layout::NCHW), true);
- ASSERT_EQ((set3 & set2).size(), 1);
- ASSERT_EQ((set3 & set2).contains(Layout::NHWC), true);
- ASSERT_EQ((set1 & set2).size(), 0);
-}
diff --git a/runtime/onert/test/core/ir/OperandIndexSet.cc b/runtime/onert/test/core/ir/OperandIndexSet.cc
deleted file mode 100644
index c363e5472..000000000
--- a/runtime/onert/test/core/ir/OperandIndexSet.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/OperandIndexSequence.h"
-
-using onert::ir::OperandIndex;
-using onert::ir::OperandIndexSequence;
-
-TEST(ir_OperandIndexSequence, neg_append)
-{
- OperandIndexSequence iset{0, 2, 4, 8};
-
- ASSERT_EQ(iset.size(), 4);
-
- iset.append(OperandIndex{10});
-
- ASSERT_EQ(iset.size(), 5);
-
- onert::ir::IOIndex index1{1};
- onert::ir::IOIndex index2{4};
-
- ASSERT_EQ(iset.at(index1), 2);
- ASSERT_EQ(iset.at(index2), 10);
-
- ASSERT_TRUE(iset.contains(OperandIndex{2}));
- ASSERT_TRUE(iset.contains(OperandIndex{10}));
- ASSERT_FALSE(iset.contains(OperandIndex{11}));
-}
-
-TEST(graph_OperandIndexSequence, neg_replace)
-{
- OperandIndexSequence iset{0, 1, 2, 3};
-
- iset.replace(OperandIndex{1}, OperandIndex{9});
- ASSERT_FALSE(iset.contains(OperandIndex{1}));
- ASSERT_TRUE(iset.contains(OperandIndex{9}));
-}
diff --git a/runtime/onert/test/core/ir/OperandSet.cc b/runtime/onert/test/core/ir/OperandSet.cc
deleted file mode 100644
index 6cf9c8842..000000000
--- a/runtime/onert/test/core/ir/OperandSet.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Operands.h"
-
-TEST(ir_Operands, neg_set_test)
-{
- onert::ir::Operands set;
-
- onert::ir::Shape shape0{1, 2, 3};
-
- onert::ir::Shape shape1(4);
- shape1.dim(0) = 10;
- shape1.dim(1) = 20;
- shape1.dim(2) = 30;
- shape1.dim(3) = 40;
-
- onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
- set.emplace(shape0, type);
- set.emplace(shape1, type);
-
- ASSERT_EQ(set.exist(onert::ir::OperandIndex{0u}), true);
- ASSERT_EQ(set.exist(onert::ir::OperandIndex{1u}), true);
- ASSERT_EQ(set.exist(onert::ir::OperandIndex{2u}), false);
-
- ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(0), 1);
- ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(1), 2);
- ASSERT_EQ(set.at(onert::ir::OperandIndex{0u}).shape().dim(2), 3);
-}
diff --git a/runtime/onert/test/core/ir/OperationSet.cc b/runtime/onert/test/core/ir/OperationSet.cc
deleted file mode 100644
index 4a17eeb33..000000000
--- a/runtime/onert/test/core/ir/OperationSet.cc
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "MockNode.h"
-#include "ir/Operations.h"
-
-using onert::ir::Operation;
-using onert::ir::OperationIndex;
-using onert::ir::Operations;
-
-TEST(ir_Operations, basic)
-{
- Operations ops;
- ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
- OperationIndex idx{0u};
- ASSERT_EQ(ops.at(idx).getInputs().size(), 4);
- ASSERT_EQ(ops.at(idx).getOutputs().size(), 3);
-}
-
-TEST(ir_Operations, neg_at)
-{
- Operations ops;
- ops.push(std::unique_ptr<Operation>(new onert_test::ir::SimpleMock({1, 2, 3, 4}, {5, 6, 7})));
- OperationIndex idx{99u};
- EXPECT_THROW(ops.at(idx), std::out_of_range);
-}
diff --git a/runtime/onert/test/core/ir/SetIO.cc b/runtime/onert/test/core/ir/SetIO.cc
deleted file mode 100644
index 68b477347..000000000
--- a/runtime/onert/test/core/ir/SetIO.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/Index.h"
-#include "ir/OperandIndexSequence.h"
-#include "ir/operation/Conv2D.h"
-#include "ir/operation/Concat.h"
-
-#include <memory>
-
-#include <stdexcept>
-
-using Index = onert::ir::IOIndex;
-using IndexSet = onert::ir::OperandIndexSequence;
-
-TEST(ir_Operation_setIO, operation_setIO_conv)
-{
- onert::ir::Graph graph;
-
- onert::ir::Shape shape{3};
- onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
- // Add Conv
- using Graph = onert::ir::operation::Conv2D;
-
- auto input_operand = graph.addOperand(shape, type);
- auto kernel_operand = graph.addOperand(shape, type);
- auto bias_operand = graph.addOperand(shape, type);
- IndexSet inputs{input_operand, kernel_operand, bias_operand};
-
- Graph::Param conv_params;
- conv_params.padding.type = onert::ir::PaddingType::SAME;
- conv_params.stride.horizontal = 1;
- conv_params.stride.vertical = 1;
- conv_params.activation = onert::ir::Activation::NONE;
-
- auto output_operand = graph.addOperand(shape, type).value();
- IndexSet outputs{output_operand};
-
- auto conv = std::make_unique<Graph>(inputs, outputs, conv_params);
-
- ASSERT_NE(conv, nullptr);
- ASSERT_EQ(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
- conv->setInputs({8, 9, 10});
- ASSERT_NE(conv->getInputs().at(Index{0}).value(), inputs.at(0).value());
- ASSERT_EQ(conv->getInputs().at(Index{0}).value(), 8);
-}
-
-TEST(ir_Operation_setIO, neg_operation_setIO_concat)
-{
- onert::ir::Graph graph;
-
- onert::ir::Shape shape{3};
-
- onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
- using Graph = onert::ir::operation::Concat;
-
- // Add Concat
- IndexSet inputs;
- for (int i = 0; i < 6; ++i)
- {
- inputs.append(graph.addOperand(shape, type));
- }
-
- Graph::Param concat_params{0};
-
- auto output_operand = graph.addOperand(shape, type).value();
- IndexSet outputs{output_operand};
-
- auto concat = std::make_unique<Graph>(inputs, outputs, concat_params);
-
- ASSERT_NE(concat, nullptr);
- ASSERT_EQ(concat->getInputs().size(), 6);
- ASSERT_EQ(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
-
- concat->setInputs({80, 6, 9, 11});
- ASSERT_EQ(concat->getInputs().size(), 4);
- ASSERT_NE(concat->getInputs().at(Index{0}).value(), inputs.at(0).value());
- ASSERT_EQ(concat->getInputs().at(Index{0}).value(), 80);
- ASSERT_EQ(concat->getInputs().at(Index{2}).value(), 9);
- ASSERT_THROW(concat->getInputs().at(Index{5}), std::out_of_range);
-}
diff --git a/runtime/onert/test/core/ir/Shape.cc b/runtime/onert/test/core/ir/Shape.cc
deleted file mode 100644
index c24aeda8d..000000000
--- a/runtime/onert/test/core/ir/Shape.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <ir/Shape.h>
-
-#include <gtest/gtest.h>
-
-TEST(ShapeTest, basic_test)
-{
- {
- onert::ir::Shape shape(3);
-
- shape.dim(0) = 1;
- shape.dim(1) = 2;
- shape.dim(2) = 3;
-
- ASSERT_EQ(shape.rank(), 3);
- ASSERT_EQ(shape.num_elements(), 6);
- ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
- ASSERT_EQ(shape.hasUnspecifiedDims(), false);
- }
- {
- onert::ir::Shape shape; // scalar or rank is unspecified
-
- ASSERT_EQ(shape.rank(), 0);
- ASSERT_EQ(shape.num_elements(), 1);
- ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), true);
- ASSERT_EQ(shape.hasUnspecifiedDims(), false);
- }
-}
-
-TEST(ShapeTest, neg_basic_test)
-{
- {
- onert::ir::Shape shape(2);
-
- shape.dim(0) = 1;
- shape.dim(1) = onert::ir::Shape::UNSPECIFIED_DIM;
-
- ASSERT_EQ(shape.rank(), 2);
- ASSERT_EQ(onert::ir::rankMaybeUnspecified(shape), false);
- ASSERT_EQ(shape.hasUnspecifiedDims(), true);
- EXPECT_ANY_THROW(shape.num_elements());
- }
-}
diff --git a/runtime/onert/test/core/ir/UseDef.cc b/runtime/onert/test/core/ir/UseDef.cc
deleted file mode 100644
index 47c98f939..000000000
--- a/runtime/onert/test/core/ir/UseDef.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Graph.h"
-#include "ir/verifier/Verifier.h"
-#include <memory>
-#include "MockNode.h"
-
-#include <typeindex>
-
-namespace
-{
-
-using IndexSet = onert::ir::OperandIndexSequence;
-using Mock = onert_test::ir::SimpleMock;
-
-} // namespace
-
-TEST(ir_Operand, neg_usedef)
-{
- onert::ir::Graph graph;
- onert::ir::verifier::DAGChecker verifier;
-
- onert::ir::Shape shape(3);
- onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
- // Model Input/Output
- auto input_operand = graph.addOperand(shape, type);
- auto output_operand = graph.addOperand(shape, type);
-
- graph.addInput(input_operand);
- graph.addOutput(output_operand);
-
- // MockNode1
- auto operand_index1 = graph.addOperand(shape, type);
- auto mocknode_index1 =
- graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index1}));
-
- // MockNode2
- auto operand_index2 = graph.addOperand(shape, type);
- auto mocknode_index2 =
- graph.addOperation(std::make_unique<Mock>(IndexSet{input_operand}, IndexSet{operand_index2}));
-
- // MockNode3(two input)
- auto multiinput_index = graph.addOperation(
- std::make_unique<Mock>(IndexSet{operand_index1, operand_index2}, IndexSet{output_operand}));
-
- graph.verify();
-
- ASSERT_TRUE(verifier.verify(graph));
-
- // Check def
- ASSERT_EQ(graph.operands().at(operand_index1).getDef(), mocknode_index1);
- ASSERT_EQ(graph.operands().at(operand_index2).getDef(), mocknode_index2);
- ASSERT_EQ(graph.operands().at(output_operand).getDef(), multiinput_index);
-
- ASSERT_NE(graph.operands().at(operand_index1).getDef(), mocknode_index2);
- ASSERT_NE(graph.operands().at(operand_index1).getDef(), multiinput_index);
-
- // Check use
- ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index1), true);
- ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(mocknode_index2), true);
- ASSERT_EQ(graph.operands().at(input_operand).getUses().contains(multiinput_index), false);
- ASSERT_EQ(graph.operands().at(operand_index1).getUses().contains(multiinput_index), true);
- ASSERT_EQ(graph.operands().at(operand_index2).getUses().contains(multiinput_index), true);
-
- ASSERT_EQ(graph.operands().at(input_operand).getUses().size(), 2);
- ASSERT_EQ(graph.operands().at(operand_index1).getUses().size(), 1);
- ASSERT_EQ(graph.operands().at(output_operand).getUses().size(), 0);
-}
diff --git a/runtime/onert/test/core/ir/Verifier.cc b/runtime/onert/test/core/ir/Verifier.cc
deleted file mode 100644
index b4be2d9cd..000000000
--- a/runtime/onert/test/core/ir/Verifier.cc
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Operation.h"
-#include "ir/Graph.h"
-#include "ir/verifier/Verifier.h"
-#include <memory>
-#include "ir/Operand.h"
-#include "MockNode.h"
-
-using IndexSet = onert::ir::OperandIndexSequence;
-using Mock = onert_test::ir::SimpleMock;
-
-TEST(Verifier, dag_checker)
-{
- onert::ir::Graph graph;
-
- onert::ir::Shape shape{3};
- onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
- auto operand1 = graph.addOperand(shape, type);
- auto operand2 = graph.addOperand(shape, type);
-
- graph.addInput(operand1);
- graph.addOutput(operand2);
-
- graph.addOperation(std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2}));
-
- onert::ir::verifier::DAGChecker verifier;
-
- ASSERT_TRUE(verifier.verify(graph));
-}
-
-TEST(Verifier, neg_edge_consistency_checker_1)
-{
- onert::ir::Graph graph;
-
- onert::ir::Shape shape{3};
- onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
- auto operand1 = graph.addOperand(shape, type);
- auto operand2 = graph.addOperand(shape, type);
-
- graph.addInput(operand1);
- graph.addOutput(operand2);
-
- auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
- auto op_ind = graph.addOperation(std::move(mock_op));
-
- graph.operands().at(operand1).removeUse(op_ind); // Manipulate the operand alone
-
- onert::ir::verifier::EdgeChecker verifier;
- ASSERT_FALSE(verifier.verify(graph));
-}
-
-TEST(Verifier, neg_edge_consistency_checker_2)
-{
- onert::ir::Graph graph;
-
- onert::ir::Shape shape{3};
- onert::ir::TypeInfo type{onert::ir::DataType::INT32};
-
- auto operand1 = graph.addOperand(shape, type);
- auto operand2 = graph.addOperand(shape, type);
-
- graph.addInput(operand1);
- graph.addOutput(operand2);
-
- auto mock_op = std::make_unique<Mock>(IndexSet{operand1}, IndexSet{operand2});
- auto mock_op_ptr = mock_op.get();
- auto op_ind = graph.addOperation(std::move(mock_op));
-
- mock_op_ptr->setInputs({operand2}); // Manipulate the operation alone
-
- onert::ir::verifier::EdgeChecker verifier;
- ASSERT_FALSE(verifier.verify(graph));
-}
diff --git a/runtime/onert/test/core/util/Index.cc b/runtime/onert/test/core/util/Index.cc
deleted file mode 100644
index 2d110e326..000000000
--- a/runtime/onert/test/core/util/Index.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "util/Index.h"
-
-using Index = ::onert::util::Index<uint32_t, struct TestTag>;
-
-TEST(Index, neg_index_test)
-{
- Index idx1{1u};
- Index idx2{2u};
- Index idx3{idx1};
-
- ASSERT_EQ(idx1, 1);
- ASSERT_EQ(idx1, 1u);
- ASSERT_EQ(idx1.value(), 1u);
- ASSERT_NE(idx1, idx2);
- ASSERT_EQ(idx1, idx3);
-}
diff --git a/runtime/onert/test/core/util/ObjectManager.cc b/runtime/onert/test/core/util/ObjectManager.cc
deleted file mode 100644
index 78f044e56..000000000
--- a/runtime/onert/test/core/util/ObjectManager.cc
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "util/ObjectManager.h"
-#include "util/Index.h"
-
-using namespace onert;
-
-struct TestTag;
-using Index = typename util::Index<uint32_t, TestTag>;
-
-TEST(ObjectManager, emplace)
-{
- util::ObjectManager<Index, int> man;
-
- auto index = man.emplace(100);
- ASSERT_EQ(man.at(index), 100);
-}
-
-TEST(ObjectManager, neg_remove_1)
-{
- util::ObjectManager<Index, int> man;
-
- Index index = man.emplace(100);
- ASSERT_TRUE(man.exist(index));
- ASSERT_EQ(man.at(index), 100);
-
- man.remove(index);
- ASSERT_FALSE(man.exist(index));
-}
-
-TEST(ObjectManager, neg_remove_2)
-{
- util::ObjectManager<Index, int> man;
-
- auto index0 = man.emplace(100);
- auto index1 = man.emplace(200);
- ASSERT_TRUE(man.exist(index0));
- ASSERT_EQ(man.at(index0), 100);
- ASSERT_TRUE(man.exist(index1));
- ASSERT_EQ(man.at(index1), 200);
-
- man.remove(index0);
- ASSERT_FALSE(man.exist(index0));
- ASSERT_TRUE(man.exist(index1));
- ASSERT_EQ(man.at(index1), 200);
-}
-
-TEST(ObjectManager, push)
-{
- util::ObjectManager<Index, int> man;
-
- // Not specify index
- auto index = man.push(std::make_unique<int>(100));
- ASSERT_EQ(man.at(index), 100);
-
- // Specify index
- auto index2 = man.push(std::make_unique<int>(200), Index{33});
- ASSERT_EQ(index2.value(), 33);
- ASSERT_EQ(man.at(index2), 200);
-
- auto index3 = man.push(std::make_unique<int>(300));
- // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
- ASSERT_EQ(index3.value(), 34);
- ASSERT_EQ(man.at(index3), 300);
-
- auto index4 = man.push(std::make_unique<int>(400), Index{22});
- ASSERT_EQ(index4.value(), 22);
- ASSERT_EQ(man.at(index4), 400);
-
- auto index5 = man.push(std::make_unique<int>(500));
- // NOTE auto-generated index number is always (biggest index in the ObjectManager + 1)
- ASSERT_EQ(index5.value(), 35);
- ASSERT_EQ(man.at(index5), 500);
-}
-
-TEST(ObjectManager, neg_push)
-{
- util::ObjectManager<Index, int> man;
-
- // Specify index
- auto index = man.push(std::make_unique<int>(100), Index{55});
- ASSERT_EQ(index.value(), 55);
- ASSERT_EQ(man.at(index), 100);
-
- // Specify the same index
- auto index2 = man.push(std::make_unique<int>(200), Index{55});
- ASSERT_FALSE(index2.valid());
-}
-
-static const uint32_t kMaxUInt32 = std::numeric_limits<uint32_t>::max();
-
-TEST(ObjectManager, neg_push_undefined_index)
-{
- util::ObjectManager<Index, int> man;
-
- // Try inserting invalid(undefined) index
- auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32});
- ASSERT_FALSE(index.valid());
- ASSERT_EQ(man.size(), 0);
-}
-
-TEST(ObjectManager, neg_push_max_index)
-{
- util::ObjectManager<Index, int> man;
-
- // Insert an object with maximum valid index
- auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
- ASSERT_EQ(index.value(), kMaxUInt32 - 1);
- ASSERT_EQ(man.at(index), 100);
- ASSERT_EQ(man.size(), 1);
-
- // Reached to the final index so next push/emplace must fail
- auto index2 = man.push(std::make_unique<int>(200));
- ASSERT_EQ(man.size(), 1);
- ASSERT_FALSE(index2.valid());
-}
-
-TEST(ObjectManager, neg_emplace_max_index)
-{
- util::ObjectManager<Index, int> man;
-
- // Insert an object with maximum valid index
- auto index = man.push(std::make_unique<int>(100), Index{kMaxUInt32 - 1});
- ASSERT_EQ(index.value(), kMaxUInt32 - 1);
- ASSERT_EQ(man.at(index), 100);
- ASSERT_EQ(man.size(), 1);
-
- // Reached to the final index so next push/emplace must fail
- auto index3 = man.emplace(200);
- ASSERT_EQ(man.size(), 1);
- ASSERT_FALSE(index3.valid());
-}
-
-TEST(ObjectManager, const_iterate)
-{
- util::ObjectManager<Index, int> man;
-
- auto index0 = man.emplace(100);
- auto index1 = man.emplace(200);
- auto index2 = man.emplace(300);
-
- int sum = 0;
- man.iterate([&](const Index &index, const int &val) { sum += val; });
- ASSERT_EQ(sum, 600);
-}
-
-TEST(ObjectManager, non_const_iterate)
-{
- util::ObjectManager<Index, int> man;
-
- auto index0 = man.emplace(100);
- auto index1 = man.emplace(200);
- auto index2 = man.emplace(300);
-
- man.iterate([&](const Index &index, int &val) { val += 1; });
- ASSERT_EQ(man.at(index0), 101);
- ASSERT_EQ(man.at(index1), 201);
- ASSERT_EQ(man.at(index2), 301);
-}
-
-TEST(ObjectManager, set)
-{
- util::ObjectManager<Index, int> man;
- auto index = man.set(Index{1}, std::make_unique<int>(100)); // Insert
- ASSERT_EQ(index, Index{1});
- auto index2 = man.set(index, std::make_unique<int>(200)); // Overwrite
- ASSERT_EQ(index2, index);
- ASSERT_EQ(man.at(index2), 200);
-}
-
-TEST(ObjectManager, neg_set)
-{
- auto v = std::make_unique<int>(100);
- util::ObjectManager<Index, int> man;
- auto index = man.set(Index{}, std::move(v)); // Try set with an invalid index
- ASSERT_EQ(index, Index{});
- ASSERT_FALSE(index.valid());
- ASSERT_NE(v, nullptr); // v must be kept when failure
-}
-
-TEST(ObjectManager, getRawPtr)
-{
- auto v = std::make_unique<int>(100);
- auto v_ptr = v.get();
- util::ObjectManager<Index, int> man;
- auto index = man.push(std::move(v));
- ASSERT_EQ(v_ptr, man.getRawPtr(index));
-}
-
-TEST(ObjectManager, neg_getRawPtr)
-{
- util::ObjectManager<Index, int> man;
- auto ptr = man.getRawPtr(Index{1});
- ASSERT_EQ(ptr, nullptr);
-}
diff --git a/runtime/onert/test/core/util/ShapeInference.cc b/runtime/onert/test/core/util/ShapeInference.cc
deleted file mode 100644
index 2ecaa2885..000000000
--- a/runtime/onert/test/core/util/ShapeInference.cc
+++ /dev/null
@@ -1,545 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-
-#include "ir/Layout.h"
-#include "util/ShapeInference.h"
-
-using namespace onert::ir;
-
-TEST(ShapeInference, Elementwise)
-{
- Shape lhs_shape{1, 299, 299, 3};
- Shape rhs_shape{3};
- auto infered_out_shape = onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.dim(0), 1);
- ASSERT_EQ(infered_out_shape.dim(1), 299);
- ASSERT_EQ(infered_out_shape.dim(2), 299);
- ASSERT_EQ(infered_out_shape.dim(3), 3);
-}
-
-TEST(ShapeInference, neg_Elementwise)
-{
- Shape lhs_shape{1, 299, 299, 3};
- Shape rhs_shape{5, 3};
- ASSERT_THROW(onert::shape_inference::inferEltwiseShape(lhs_shape, rhs_shape), std::runtime_error);
-}
-
-TEST(ShapeInference, Pool2DNodeSame)
-{
- Shape in_shape{10, 6, 12, 20};
- Stride stride{3, 7};
- Padding padding{PaddingType::SAME};
-
- operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
- operation::Pool2D::Param max_pool_param{
- operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, Pool2DNodeValid)
-{
- Shape in_shape{10, 6, 12, 20};
- Stride stride{3, 7};
- Padding padding{PaddingType::VALID};
-
- operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
- operation::Pool2D::Param max_pool_param{
- operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, Pool2DNodeExplicit)
-{
- Shape in_shape{10, 3, 5, 20};
-
- Stride stride{3, 7};
- Padding padding{4, 3, 2, 1};
-
- operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
- auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-
- operation::Pool2D::Param max_pool_param{
- operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE};
- infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20);
-}
-
-TEST(ShapeInference, neg_Pool2DNode_InvalidStride)
-{
- Shape in_shape{10, 6, 12, 20};
- Stride stride{0, 7};
- Padding padding{PaddingType::SAME};
-
- operation::Pool2D::Param avg_pool_param{
- operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE};
- ASSERT_THROW(onert::shape_inference::inferPoolShape(in_shape, avg_pool_param),
- std::runtime_error);
-}
-
-TEST(ShapeInference, Conv2D)
-{
- Shape in_shape{10, 6, 12, 20};
- Shape ker_shape{30, 3, 6, 20};
-
- operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE,
- Dilation{1, 1}};
- auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-
- param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE,
- Dilation{1, 1}};
- infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-
- param =
- operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}};
- infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30);
-}
-
-TEST(ShapeInference, neg_Conv2D_InvalidStride)
-{
- Shape in_shape{10, 6, 12, 20};
- Shape ker_shape{30, 3, 6, 20};
-
- operation::Conv2D::Param param{Stride{0, 0}, Padding{PaddingType::VALID}, Activation::NONE,
- Dilation{1, 1}};
- ASSERT_THROW(onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param),
- std::runtime_error);
-}
-
-TEST(ShapeInference, DepthwiseConv2D)
-{
- Shape in_shape{10, 6, 12, 20};
- Shape ker_shape{1, 3, 6, 60};
-
- operation::DepthwiseConv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, 3,
- Activation::NONE, Dilation{1, 1}};
- auto infered_out_shape =
- onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-
- param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, 3,
- Activation::NONE, Dilation{1, 1}};
- infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-
- param = operation::DepthwiseConv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, 3, Activation::NONE,
- Dilation{1, 1}};
- infered_out_shape = onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 4);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).H, 3);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2);
- ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 60);
-}
-
-TEST(ShapeInference, neg_DepthwiseConv2D_InvalidSride)
-{
- Shape in_shape{10, 6, 12, 20};
- Shape ker_shape{1, 3, 6, 60};
-
- operation::DepthwiseConv2D::Param param{Stride{3, 0}, Padding{PaddingType::VALID}, 3,
- Activation::NONE, Dilation{1, 1}};
- ASSERT_THROW(onert::shape_inference::inferDepthwiseConv2DShape(in_shape, ker_shape, param),
- std::runtime_error);
-}
-
-TEST(ShapeInference, Concat)
-{
- {
- Shape in1{10, 20, 30, 3, 50};
- Shape in2{10, 20, 30, 2, 50};
- Shape in3{10, 20, 30, 2, 50};
-
- operation::Concat::Param param{3};
- auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2, in3}, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 5);
- ASSERT_EQ(infered_out_shape.dim(0), 10);
- ASSERT_EQ(infered_out_shape.dim(1), 20);
- ASSERT_EQ(infered_out_shape.dim(2), 30);
- ASSERT_EQ(infered_out_shape.dim(3), 7);
- ASSERT_EQ(infered_out_shape.dim(4), 50);
- }
- {
- // case 1. when axis < 0
- Shape in1{10, 20, 2};
- Shape in2{10, 20, 3};
-
- operation::Concat::Param param{-1};
- auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 3);
- ASSERT_EQ(infered_out_shape.dim(0), 10);
- ASSERT_EQ(infered_out_shape.dim(1), 20);
- ASSERT_EQ(infered_out_shape.dim(2), 5);
- }
- {
- // case 2. when axis < 0
- Shape in1{2, 20, 2};
- Shape in2{3, 20, 2};
-
- operation::Concat::Param param{-3};
- auto infered_out_shape = onert::shape_inference::inferConcatShape({in1, in2}, param);
-
- ASSERT_EQ(infered_out_shape.rank(), 3);
- ASSERT_EQ(infered_out_shape.dim(0), 5);
- ASSERT_EQ(infered_out_shape.dim(1), 20);
- ASSERT_EQ(infered_out_shape.dim(2), 2);
- }
-}
-
-TEST(ShapeInference, neg_Concat)
-{
- {
- operation::Concat::Param param{2};
- Shape in1{10, 1, 3};
- Shape in2{10, 2, 4}; // dim[1] should be 1 but 2
-
- EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
- }
- { // wrong rank
- operation::Concat::Param param{2};
- Shape in1{10, 2, 3, 4};
- Shape in2{10, 2, 4}; // rank should be 4
-
- EXPECT_ANY_THROW(onert::shape_inference::inferConcatShape({in1, in2}, param));
- }
-}
-
-TEST(ShapeInference, ExpandDims)
-{
- Shape in_shape{30, 40};
-
- auto check = [&](int32_t axis, Shape &expected) {
- auto actual = onert::shape_inference::inferExpandDimsShape(in_shape, axis);
-
- ASSERT_EQ(actual.rank(), 3);
- for (int32_t dim = 0; dim < expected.rank(); dim++)
- ASSERT_EQ(actual.dim(dim), expected.dim(dim));
- };
-
- { // boundary
- int32_t axis = 0;
- Shape expected{1, 30, 40};
- check(axis, expected);
- }
- { // boundary
- int32_t axis = 2;
- Shape expected{30, 40, 1};
- check(axis, expected);
- }
- { // inside
- int32_t axis = 1;
- Shape expected{30, 1, 40};
- check(axis, expected);
- }
- { // negative boundary
- int32_t axis = -1;
- Shape expected{30, 40, 1};
- check(axis, expected);
- }
- { // negative boundary
- int32_t axis = -3;
- Shape expected{1, 30, 40};
- check(axis, expected);
- }
-}
-
-TEST(ShapeInference, neg_ExpandDims)
-{
- Shape in_shape{30, 40};
-
- { // over boundary
- int32_t axis = 3;
- ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
- }
- { // over boundary
- int32_t axis = -4;
- ASSERT_THROW(onert::shape_inference::inferExpandDimsShape(in_shape, axis), std::runtime_error);
- }
-}
-
-TEST(ShapeInference, FullyConnected)
-{
- Shape in_shape{3, 4, 5, 6};
- Shape ker_shape{3, 10};
- auto infered_out_shape = onert::shape_inference::inferFullyConnectedShape(in_shape, ker_shape);
-
- ASSERT_EQ(infered_out_shape.rank(), 2);
- ASSERT_EQ(infered_out_shape.dim(0), 36);
- ASSERT_EQ(infered_out_shape.dim(1), 3);
-}
-
-TEST(ShapeInference, Transpose)
-{
- auto check = [&](Shape &in_shape, std::vector<int> perm, Shape &expected) {
- // pre-conditions
- ASSERT_EQ(in_shape.rank(), perm.size());
- ASSERT_EQ(expected.rank(), perm.size());
- auto inferred_out_shape =
- onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size());
- // post-conditions
- ASSERT_EQ(inferred_out_shape.rank(), perm.size());
- for (int32_t dim = 0; dim < expected.rank(); dim++)
- {
- ASSERT_EQ(inferred_out_shape.dim(dim), expected.dim(dim));
- }
- };
- // check for 2-D
- {
- Shape in_shape{2, 3};
- std::vector<int> perm = {1, 0};
- Shape expected{3, 2};
- // int32_t rank = 2;
- check(in_shape, perm, expected);
- }
- // check for 3-D
- {
- Shape in_shape{1, 2, 3};
- std::vector<int> perm = {2, 0, 1};
- Shape expected{3, 1, 2};
- // int32_t rank = 3;
- check(in_shape, perm, expected);
- }
- // check for 4-D
- {
- Shape in_shape{1, 2, 3, 4};
- std::vector<int> perm = {1, 3, 0, 2};
- Shape expected{2, 4, 1, 3};
- // int32_t rank = 4;
- check(in_shape, perm, expected);
- }
-}
-
-TEST(ShapeInference, neg_Transpose)
-{
- Shape in_shape{1, 2, 3};
- // Invalid parameter size
- {
- std::vector<int> perm = {2, 0, 1, 0};
- // int32_t rank = 3;
- ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
- std::runtime_error);
- }
- // Invalid parameter value
- {
- std::vector<int> perm = {2, 0, 3};
- // int32_t rank = 3;
- ASSERT_THROW(onert::shape_inference::inferTransposeShape(in_shape, perm.data(), perm.size()),
- std::runtime_error);
- }
-}
-
-TEST(ShapeInference, Gather)
-{
- auto check = [&](Shape &input, Shape &indices, Shape &expected, int32_t axis) {
- int rank = input.rank();
- auto actual = onert::shape_inference::inferGatherShape(input, indices, axis, rank);
-
- ASSERT_EQ(actual.rank(), expected.rank());
-
- for (int32_t dim = 0; dim < expected.rank(); dim++)
- ASSERT_EQ(actual.dim(dim), expected.dim(dim));
- };
-
- // check for 2-D, 3-D, axis 0
- {
- Shape input{3, 4};
- Shape indices{1, 1, 2};
- int32_t axis = 0;
- Shape expected{1, 1, 2, 4};
- check(input, indices, expected, axis);
- }
-
- // check for 2-D, 3-D, axis 1
- {
- Shape input{3, 4};
- Shape indices{1, 2, 1};
- int32_t axis = 1;
- Shape expected{3, 1, 2, 1};
- check(input, indices, expected, axis);
- }
-
- // check for 3-D, 2-D, axis 0
- {
- Shape input{2, 3, 4};
- Shape indices{1, 2};
- int32_t axis = 0;
- Shape expected{1, 2, 3, 4};
- check(input, indices, expected, axis);
- }
-
- // check for 3-D, 2-D, axis 2
- {
- Shape input{2, 3, 4};
- Shape indices{2, 1};
- int32_t axis = 2;
- Shape expected{2, 3, 2, 1};
- check(input, indices, expected, axis);
- }
-
- // check for 4D, axis 0
- {
- Shape input{1, 2, 3, 4};
- Shape indices{2};
- int32_t axis = 0;
- Shape expected{2, 2, 3, 4};
- check(input, indices, expected, axis);
- }
-}
-
-TEST(ShapeInference, BCQFullyConnected)
-{
- auto check = [&](Shape &in_shape, Shape &cluster_shape, std::vector<int> cluster,
- Shape &expected) {
- auto actual =
- onert::shape_inference::inferBCQFullyConnectedShape(in_shape, cluster_shape, cluster.data());
- ASSERT_EQ(actual.rank(), expected.rank());
-
- for (int32_t dim = 0; dim < expected.rank(); dim++)
- ASSERT_EQ(actual.dim(dim), expected.dim(dim));
- };
-
- {
- Shape in_shape{10, 1};
- Shape cluster_shape{3, 2};
- std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
-
- Shape expected{30, 1};
- check(in_shape, cluster_shape, cluster, expected);
- }
-
- {
- Shape in_shape{1, 1};
- Shape cluster_shape{1, 2};
- std::vector<int> cluster = {3, 50};
-
- Shape expected{50, 1};
- check(in_shape, cluster_shape, cluster, expected);
- }
-}
-
-TEST(ShapeInference, BCQGather)
-{
- auto check = [&](Shape &indices_shape, Shape &cluster_shape, std::vector<int> cluster,
- uint32_t hidden_size, uint32_t axis, int rank, Shape &expected) {
- operation::BCQGather::Param param{hidden_size, axis};
- auto actual = onert::shape_inference::inferBCQGatherShape(indices_shape, cluster_shape,
- cluster.data(), rank, param);
- ASSERT_EQ(actual.rank(), expected.rank());
-
- for (int32_t dim = 0; dim < expected.rank(); dim++)
- ASSERT_EQ(actual.dim(dim), expected.dim(dim));
- };
-
- {
- Shape indices_shape{5, 1};
- Shape cluster_shape{3, 2};
- std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
- uint32_t hidden_size = 10;
- uint32_t axis = 0;
- int rank = 2;
-
- Shape expected{5, 1, 10};
- check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
- }
-
- {
- Shape indices_shape{5, 1};
- Shape cluster_shape{3, 2};
- std::vector<int> cluster = {1, 10, 2, 10, 3, 10};
- uint32_t hidden_size = 10;
- uint32_t axis = 1;
- int rank = 2;
-
- Shape expected{30, 5, 1};
- check(indices_shape, cluster_shape, cluster, hidden_size, axis, rank, expected);
- }
-}
diff --git a/runtime/service/CMakeLists.txt b/runtime/service/CMakeLists.txt
new file mode 100644
index 000000000..5ea6cdadd
--- /dev/null
+++ b/runtime/service/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectories()
diff --git a/runtime/service/npud/CMakeLists.txt b/runtime/service/npud/CMakeLists.txt
new file mode 100644
index 000000000..8cf51649c
--- /dev/null
+++ b/runtime/service/npud/CMakeLists.txt
@@ -0,0 +1,21 @@
+if(NOT BUILD_NPUD)
+ return()
+endif(NOT BUILD_NPUD)
+
+nnfw_find_package(GLib2.0 REQUIRED)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_executable(npud ${SOURCES})
+set_target_properties(npud PROPERTIES LINKER_LANGUAGE CXX)
+target_include_directories(npud PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(npud PUBLIC ${GLIB2.0_INCLUDE_DIRS})
+target_link_libraries(npud PRIVATE nnfw_lib_misc)
+target_link_libraries(npud PRIVATE ${GLIB2.0_LIBRARIES})
+target_link_libraries(npud PRIVATE ${LIB_PTHREAD})
+
+if(ENVVAR_NPUD_CONFIG)
+ target_compile_definitions(npud PRIVATE ENVVAR_FOR_DEFAULT_CONFIG)
+endif(ENVVAR_NPUD_CONFIG)
+
+install(TARGETS npud DESTINATION bin)
diff --git a/runtime/service/npud/core/Server.cc b/runtime/service/npud/core/Server.cc
new file mode 100644
index 000000000..5b15388dc
--- /dev/null
+++ b/runtime/service/npud/core/Server.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Server.h"
+
+#include <thread>
+#include <util/Logging.h>
+
+namespace npud
+{
+namespace core
+{
+
+std::atomic_bool Server::_isRunning(false);
+
+Server::Server() noexcept
+ : _mainloop(g_main_loop_new(NULL, FALSE), g_main_loop_unref), _signal(std::make_unique<Signal>())
+{
+}
+
+void Server::run(void)
+{
+ VERBOSE(Server) << "Starting Server\n";
+
+ if (_isRunning.exchange(true))
+ {
+ throw std::runtime_error("Mainloop is already running.");
+ }
+
+ g_main_loop_run(_mainloop.get());
+}
+
+void Server::stop(void)
+{
+ VERBOSE(Server) << "Stop Server\n";
+
+ if (!_isRunning.load())
+ {
+ throw std::runtime_error("Mainloop is not running");
+ }
+
+ while (!g_main_loop_is_running(_mainloop.get()))
+ {
+ std::this_thread::yield();
+ }
+
+ g_main_loop_quit(_mainloop.get());
+ _isRunning = false;
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/Server.h b/runtime/service/npud/core/Server.h
new file mode 100644
index 000000000..e2f37f8fe
--- /dev/null
+++ b/runtime/service/npud/core/Server.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_SERVER_H__
+#define __ONE_SERVICE_NPUD_CORE_SERVER_H__
+
+#include "Signal.h"
+
+#include <glib.h>
+#include <memory>
+#include <atomic>
+
+namespace npud
+{
+namespace core
+{
+
+class Server
+{
+public:
+ void run(void);
+ void stop(void);
+
+ static Server &instance(void)
+ {
+ static Server server;
+ return server;
+ }
+
+private:
+ Server() noexcept;
+
+ static std::atomic_bool _isRunning;
+
+ std::unique_ptr<GMainLoop, void (*)(GMainLoop *)> _mainloop;
+ std::unique_ptr<Signal> _signal;
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_SERVER_H__
diff --git a/runtime/service/npud/core/Signal.cc b/runtime/service/npud/core/Signal.cc
new file mode 100644
index 000000000..085535a6a
--- /dev/null
+++ b/runtime/service/npud/core/Signal.cc
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Signal.h"
+
+#include "Server.h"
+#include <util/Logging.h>
+
+#include <csignal>
+
+namespace npud
+{
+namespace core
+{
+
+Signal::Signal(void) noexcept { init(); }
+
+void Signal::init(void)
+{
+ // NOTE Types of signals
+ // SIGTERM: termination request, sent to the program
+ // SIGSEGV: invalid memory access (segmentation fault)
+ // SIGINT: external interrupt, usually initiated by the user
+ // SIGILL: invalid program image, such as invalid instruction
+ // SIGABRT: abnormal termination condition, as is e.g. initiated by std::abort()
+ // SIGFPE: erroneous arithmetic operation such as divide by zero
+ // from https://en.cppreference.com/w/cpp/utility/program/SIG_types
+ std::signal(SIGTERM, handleSignal);
+ std::signal(SIGSEGV, handleSignal);
+ std::signal(SIGINT, handleSignal);
+ std::signal(SIGILL, handleSignal);
+ std::signal(SIGABRT, handleSignal);
+ std::signal(SIGFPE, handleSignal);
+}
+
+void Signal::handleSignal(int signum)
+{
+ VERBOSE(signal) << "Signal received: " << strsignal(signum) << "(" << signum << ")\n";
+ Server::instance().stop();
+}
+
+} // namespace core
+} // namespace npud
diff --git a/runtime/service/npud/core/Signal.h b/runtime/service/npud/core/Signal.h
new file mode 100644
index 000000000..ffddc7255
--- /dev/null
+++ b/runtime/service/npud/core/Signal.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_CORE_SIGNAL_H__
+#define __ONE_SERVICE_NPUD_CORE_SIGNAL_H__
+
+namespace npud
+{
+namespace core
+{
+
+class Signal
+{
+public:
+ Signal() noexcept;
+
+ void init(void);
+ static void handleSignal(int signum);
+};
+
+} // namespace core
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_CORE_SIGNAL_H__
diff --git a/runtime/service/npud/core/main.cc b/runtime/service/npud/core/main.cc
new file mode 100644
index 000000000..bd885b207
--- /dev/null
+++ b/runtime/service/npud/core/main.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Server.h"
+
+#include <util/Logging.h>
+
+using namespace npud;
+
+int main(int argc, const char *argv[])
+{
+ auto &server = core::Server::instance();
+
+ VERBOSE(main) << "Starting npud\n";
+ try
+ {
+ server.run();
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cerr << err.what() << std::endl;
+ return 1;
+ }
+
+ VERBOSE(main) << "Finished npud\n";
+ return 0;
+}
diff --git a/runtime/service/npud/util/Config.lst b/runtime/service/npud/util/Config.lst
new file mode 100644
index 000000000..d45b37352
--- /dev/null
+++ b/runtime/service/npud/util/Config.lst
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CONFIG
+#error Define CONFIG before including this file
+#endif
+
+// Name | Type | Default
+CONFIG(NPUD_LOG_ENABLE , bool , "0")
diff --git a/runtime/service/npud/util/ConfigSource.cc b/runtime/service/npud/util/ConfigSource.cc
new file mode 100644
index 000000000..7a14b0200
--- /dev/null
+++ b/runtime/service/npud/util/ConfigSource.cc
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConfigSource.h"
+
+#include <misc/EnvConfigSource.h>
+#include <misc/GeneralConfigSource.h>
+#include <misc/IConfigSource.h>
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <memory>
+
+namespace npud
+{
+namespace util
+{
+
+using namespace nnfw::misc;
+
+static std::unique_ptr<IConfigSource> _source;
+
+void config_source(std::unique_ptr<IConfigSource> &&source) { _source = std::move(source); }
+
+static IConfigSource *config_source()
+{
+ if (!_source)
+ {
+#ifdef ENVVAR_FOR_DEFAULT_CONFIG
+ // Default ConfigSource is EnvConfigSource
+ _source = std::make_unique<EnvConfigSource>();
+#else
+ _source = std::make_unique<GeneralConfigSource>();
+#endif // ENVVAR_FOR_DEFAULT_CONFIG
+ }
+ return _source.get();
+}
+
+static std::string getConfigOrDefault(const std::string &key)
+{
+ static std::unordered_map<std::string, std::string> defaults;
+ if (defaults.empty())
+ {
+#define CONFIG(Name, Type, Default) \
+ { \
+ auto name = std::string{#Name}; \
+ defaults.emplace(name, std::string{Default}); \
+ }
+
+#include "Config.lst"
+
+#undef CONFIG
+ }
+
+ // Treat empty string and absence of the value to be the same
+ auto ret = config_source()->get(key);
+ // if not found search from defaults
+ if (ret.empty())
+ {
+ auto itr = defaults.find(key);
+ if (itr != defaults.end())
+ {
+ // Return the default value if exists
+ ret = itr->second;
+ }
+ }
+
+ return ret;
+}
+
+bool toBool(const std::string &val)
+{
+ static const std::array<std::string, 5> false_list{"0", "OFF", "FALSE", "N", "NO"};
+ auto false_found = std::find(false_list.begin(), false_list.end(), val);
+ return false_found == false_list.end();
+}
+
+int toInt(const std::string &val) { return std::stoi(val); }
+
+bool getConfigBool(const std::string &key)
+{
+ auto raw = getConfigOrDefault(key);
+ return toBool(raw);
+}
+
+int getConfigInt(const std::string &key)
+{
+ auto raw = getConfigOrDefault(key);
+ return toInt(raw);
+}
+
+std::string getConfigString(const std::string &key) { return getConfigOrDefault(key); }
+
+} // namespace util
+} // namespace npud
+
+namespace npud
+{
+namespace util
+{
+namespace config
+{
+
+#define CONFIG(Name, Type, Default) const char *Name = #Name;
+
+#include "Config.lst"
+
+#undef CONFIG
+
+} // namespace config
+} // namespace util
+} // namespace npud
diff --git a/runtime/service/npud/util/ConfigSource.h b/runtime/service/npud/util/ConfigSource.h
new file mode 100644
index 000000000..f4ecc79a5
--- /dev/null
+++ b/runtime/service/npud/util/ConfigSource.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__
+#define __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__
+
+#include <string>
+
+namespace npud
+{
+namespace util
+{
+
+bool getConfigBool(const std::string &key);
+int getConfigInt(const std::string &key);
+std::string getConfigString(const std::string &key);
+
+} // namespace util
+} // namespace npud
+
+namespace npud
+{
+namespace util
+{
+namespace config
+{
+
+#define CONFIG(Name, Type, Default) extern const char *Name;
+
+#include "Config.lst"
+
+#undef CONFIG
+
+} // namespace config
+} // namespace util
+} // namespace npud
+
+#endif // __ONE_SERVICE_NPUD_UTIL_CONFIG_SOURCE_H__
diff --git a/runtime/service/npud/util/Logging.h b/runtime/service/npud/util/Logging.h
new file mode 100644
index 000000000..0b75b3966
--- /dev/null
+++ b/runtime/service/npud/util/Logging.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONE_SERVICE_NPUD_UTIL_LOGGING_H__
+#define __ONE_SERVICE_NPUD_UTIL_LOGGING_H__
+
+#include <iostream>
+#include <cstring>
+
+#include "ConfigSource.h"
+
+namespace npud
+{
+namespace util
+{
+namespace logging
+{
+class Context
+{
+public:
+ Context() noexcept : _enabled{false}
+ {
+ const auto env = util::getConfigBool(util::config::NPUD_LOG_ENABLE);
+
+ if (env)
+ {
+ _enabled = true;
+ }
+ }
+
+ static Context &get() noexcept
+ {
+ static Context ctx;
+ return ctx;
+ }
+
+public:
+ bool enabled(void) const { return _enabled; }
+
+private:
+ bool _enabled;
+};
+
+static Context &ctx = Context::get();
+
+inline std::string decorated_name(const char *input)
+{
+ const int min_prefix = 16;
+ std::string prefix(input);
+ auto len_prefix = prefix.size();
+ if (len_prefix > min_prefix)
+ return "[" + prefix + "] ";
+ std::string spaces((min_prefix - len_prefix) / 2, ' ');
+ return (len_prefix % 2 ? "[ " : "[") + spaces + prefix + spaces + "] ";
+}
+} // namespace logging
+} // namespace util
+} // namespace npud
+
+#define VERBOSE(name) \
+ if (::npud::util::logging::ctx.enabled()) \
+ std::cout << ::npud::util::logging::decorated_name(#name)
+
+#define VERBOSE_F() \
+ if (::npud::util::logging::ctx.enabled()) \
+ std::cout << ::npud::util::logging::decorated_name(__func__)
+
+#define WHEN_LOG_ENABLED(METHOD) \
+ if (::npud::util::logging::ctx.enabled()) \
+ do \
+ { \
+ METHOD; \
+ } while (0)
+
+#endif // __ONE_SERVICE_NPUD_UTIL_LOGGING_H__
diff --git a/tests/nnapi/CMakeLists.txt b/tests/nnapi/CMakeLists.txt
index 67ac90f15..c1fa308a1 100644
--- a/tests/nnapi/CMakeLists.txt
+++ b/tests/nnapi/CMakeLists.txt
@@ -7,11 +7,6 @@ if (NOT BUILD_ONERT)
return()
endif(NOT BUILD_ONERT)
-# GCC Compiler under 6.2 is not support this test build
-if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.2)
- return()
-endif()
-
if (ANDROID_BOOST_ROOT)
set(BOOST_ROOT ${ANDROID_BOOST_ROOT})
endif (ANDROID_BOOST_ROOT)
diff --git a/tests/nnfw_api/src/CircleGen.cc b/tests/nnfw_api/src/CircleGen.cc
index e4e4ba1af..4f1c7f9f5 100644
--- a/tests/nnfw_api/src/CircleGen.cc
+++ b/tests/nnfw_api/src/CircleGen.cc
@@ -269,6 +269,20 @@ uint32_t CircleGen::addOperatorFloorDiv(const OperatorParams &params)
circle::BuiltinOptions_NONE, 0);
}
+uint32_t CircleGen::addOperatorGreater(const OperatorParams &params)
+{
+ auto options = circle::CreateLessOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_GREATER,
+ circle::BuiltinOptions_GreaterOptions, options);
+}
+
+uint32_t CircleGen::addOperatorGreaterEqual(const OperatorParams &params)
+{
+ auto options = circle::CreateGreaterOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_GREATER_EQUAL,
+ circle::BuiltinOptions_GreaterEqualOptions, options);
+}
+
uint32_t CircleGen::addOperatorL2Normalization(const OperatorParams &params)
{
auto options = circle::CreateL2NormOptions(_fbb).Union();
@@ -283,6 +297,13 @@ uint32_t CircleGen::addOperatorLess(const OperatorParams &params)
circle::BuiltinOptions_LessOptions, options);
}
+uint32_t CircleGen::addOperatorLessEqual(const OperatorParams &params)
+{
+ auto options = circle::CreateLessOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_LESS_EQUAL,
+ circle::BuiltinOptions_LessEqualOptions, options);
+}
+
uint32_t CircleGen::addOperatorLeakyRelu(const OperatorParams &params, float alpha)
{
auto options = circle::CreateLeakyReluOptions(_fbb, alpha).Union();
@@ -319,6 +340,13 @@ uint32_t CircleGen::addOperatorNeg(const OperatorParams &params)
circle::BuiltinOptions_NegOptions, options);
}
+uint32_t CircleGen::addOperatorNotEqual(const OperatorParams &params)
+{
+ auto options = circle::CreateEqualOptions(_fbb).Union();
+ return addOperatorWithOptions(params, circle::BuiltinOperator_NOT_EQUAL,
+ circle::BuiltinOptions_NotEqualOptions, options);
+}
+
uint32_t CircleGen::addOperatorOneHot(const OperatorParams &params, int32_t axis)
{
auto options = circle::CreateOneHotOptions(_fbb, axis).Union();
diff --git a/tests/nnfw_api/src/CircleGen.h b/tests/nnfw_api/src/CircleGen.h
index 062a8d35a..d780eb1bb 100644
--- a/tests/nnfw_api/src/CircleGen.h
+++ b/tests/nnfw_api/src/CircleGen.h
@@ -174,16 +174,20 @@ public:
uint32_t addOperatorFullyConnected(const OperatorParams &params,
circle::FullyConnectedOptionsWeightsFormat weights_format =
circle::FullyConnectedOptionsWeightsFormat_DEFAULT);
+ uint32_t addOperatorGreater(const OperatorParams &params);
+ uint32_t addOperatorGreaterEqual(const OperatorParams &params);
uint32_t addOperatorIf(const OperatorParams &params, uint32_t then_subg, uint32_t else_subg);
uint32_t addOperatorInstanceNorm(const OperatorParams &params, float epsilon,
circle::ActivationFunctionType actfn);
uint32_t addOperatorL2Normalization(const OperatorParams &params);
uint32_t addOperatorLeakyRelu(const OperatorParams &params, float alpha);
uint32_t addOperatorLess(const OperatorParams &params);
+ uint32_t addOperatorLessEqual(const OperatorParams &params);
uint32_t addOperatorLogSoftmax(const OperatorParams &params);
uint32_t addOperatorMul(const OperatorParams &params, circle::ActivationFunctionType actfn);
uint32_t addOperatorMean(const OperatorParams &params, bool keep_dims);
uint32_t addOperatorNeg(const OperatorParams &params);
+ uint32_t addOperatorNotEqual(const OperatorParams &params);
uint32_t addOperatorOneHot(const OperatorParams &params, int32_t axis);
uint32_t addOperatorPad(const OperatorParams &params);
uint32_t addOperatorPadV2(const OperatorParams &params);
diff --git a/tests/nnfw_api/src/GenModelTest.h b/tests/nnfw_api/src/GenModelTest.h
index eee50d112..90b7cfcad 100644
--- a/tests/nnfw_api/src/GenModelTest.h
+++ b/tests/nnfw_api/src/GenModelTest.h
@@ -398,7 +398,9 @@ protected:
// Check output tensor values
auto &ref_output = ref_outputs[i];
auto &output = _so.outputs[i];
- ASSERT_EQ(output.size(), ref_output.size());
+ auto expected_tensor_size = ref_output.size();
+ auto actual_tensor_size = output.size();
+ ASSERT_EQ(expected_tensor_size, actual_tensor_size) << "Output #" << i;
switch (ti.dtype)
{
@@ -419,9 +421,10 @@ protected:
// TODO better way for handling FP error?
for (uint32_t e = 0; e < ref_output.size() / sizeof(float); e++)
{
- float refval = reinterpret_cast<const float *>(ref_output.data())[e];
- float val = reinterpret_cast<const float *>(output.data())[e];
- EXPECT_NEAR(refval, val, 0.001) << "Output #" << i << ", Element Index : " << e;
+ float expected = reinterpret_cast<const float *>(ref_output.data())[e];
+ float actual = reinterpret_cast<const float *>(output.data())[e];
+ EXPECT_NEAR(expected, actual, 0.001)
+ << "Output #" << i << ", Element Index : " << e;
}
break;
case NNFW_TYPE_TENSOR_INT64:
@@ -445,9 +448,9 @@ private:
{
for (uint32_t e = 0; e < ref_buf.size() / sizeof(T); e++)
{
- T ref = reinterpret_cast<const T *>(ref_buf.data())[e];
- T act = reinterpret_cast<const T *>(act_buf.data())[e];
- EXPECT_EQ(ref, act) << "Output #" << index << ", Element Index : " << e;
+ T expected = reinterpret_cast<const T *>(ref_buf.data())[e];
+ T actual = reinterpret_cast<const T *>(act_buf.data())[e];
+ EXPECT_EQ(expected, actual) << "Output #" << index << ", Element Index : " << e;
}
}
@@ -457,10 +460,10 @@ private:
for (uint32_t e = 0; e < ref_buf.size() / sizeof(uint8_t); e++)
{
uint8_t ref_raw = reinterpret_cast<const uint8_t *>(ref_buf.data())[e];
- bool ref = (ref_raw != 0 ? true : false);
+ bool expected = (ref_raw != 0 ? true : false);
uint8_t act_raw = reinterpret_cast<const uint8_t *>(act_buf.data())[e];
- bool act = (act_raw != 0 ? true : false);
- EXPECT_EQ(ref, act) << "Output #" << index << ", Element Index : " << e;
+ bool actual = (act_raw != 0 ? true : false);
+ EXPECT_EQ(expected, actual) << "Output #" << index << ", Element Index : " << e;
}
}
diff --git a/tests/nnfw_api/src/GenModelTests.cc b/tests/nnfw_api/src/GenModelTests.test.cc
index 53a3571db..53a3571db 100644
--- a/tests/nnfw_api/src/GenModelTests.cc
+++ b/tests/nnfw_api/src/GenModelTests.test.cc
diff --git a/tests/nnfw_api/src/ModelTestDynamicTensor.cc b/tests/nnfw_api/src/ModelTestDynamicTensor.test.cc
index 1ed8f9581..1ed8f9581 100644
--- a/tests/nnfw_api/src/ModelTestDynamicTensor.cc
+++ b/tests/nnfw_api/src/ModelTestDynamicTensor.test.cc
diff --git a/tests/nnfw_api/src/ModelTestInputReshaping.cc b/tests/nnfw_api/src/ModelTestInputReshaping.test.cc
index f5ce3e062..f5ce3e062 100644
--- a/tests/nnfw_api/src/ModelTestInputReshaping.cc
+++ b/tests/nnfw_api/src/ModelTestInputReshaping.test.cc
diff --git a/tests/nnfw_api/src/RegressionTests.cc b/tests/nnfw_api/src/RegressionTests.test.cc
index de233390d..de233390d 100644
--- a/tests/nnfw_api/src/RegressionTests.cc
+++ b/tests/nnfw_api/src/RegressionTests.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc b/tests/nnfw_api/src/ValidationTestAddModelLoaded.test.cc
index 4c482369f..4c482369f 100644
--- a/tests/nnfw_api/src/ValidationTestAddModelLoaded.cc
+++ b/tests/nnfw_api/src/ValidationTestAddModelLoaded.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc b/tests/nnfw_api/src/ValidationTestAddSessionPrepared.test.cc
index d668a1cb0..d668a1cb0 100644
--- a/tests/nnfw_api/src/ValidationTestAddSessionPrepared.cc
+++ b/tests/nnfw_api/src/ValidationTestAddSessionPrepared.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc b/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.test.cc
index e09402b01..e09402b01 100644
--- a/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.cc
+++ b/tests/nnfw_api/src/ValidationTestFourAddModelsSetInput.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestMultipleSessions.cc b/tests/nnfw_api/src/ValidationTestMultipleSessions.test.cc
index ef00dc6bd..ef00dc6bd 100644
--- a/tests/nnfw_api/src/ValidationTestMultipleSessions.cc
+++ b/tests/nnfw_api/src/ValidationTestMultipleSessions.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestPipelineSession.cc b/tests/nnfw_api/src/ValidationTestPipelineSession.test.cc
index 1d92095ed..1d92095ed 100644
--- a/tests/nnfw_api/src/ValidationTestPipelineSession.cc
+++ b/tests/nnfw_api/src/ValidationTestPipelineSession.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestSessionCreated.cc b/tests/nnfw_api/src/ValidationTestSessionCreated.test.cc
index cb0791933..cb0791933 100644
--- a/tests/nnfw_api/src/ValidationTestSessionCreated.cc
+++ b/tests/nnfw_api/src/ValidationTestSessionCreated.test.cc
diff --git a/tests/nnfw_api/src/ValidationTestSingleSession.cc b/tests/nnfw_api/src/ValidationTestSingleSession.test.cc
index 852d5cd21..852d5cd21 100644
--- a/tests/nnfw_api/src/ValidationTestSingleSession.cc
+++ b/tests/nnfw_api/src/ValidationTestSingleSession.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Add.cc b/tests/nnfw_api/src/one_op_tests/Add.cc
deleted file mode 100644
index c21022972..000000000
--- a/tests/nnfw_api/src/one_op_tests/Add.cc
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-TEST_F(GenModelTest, OneOp_Add_VarToConst)
-{
- CircleGen cgen;
- std::vector<float> rhs_data{5, 4, 7, 4};
- uint32_t rhs_buf = cgen.addBuffer(rhs_data);
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({lhs}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}}, {{6, 7, 9, 8}}));
- _context->addTestCase(uniformTCD<float>({{0, 1, 2, 3}}, {{5, 5, 9, 7}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Add_VarToVar)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Add_VarToVarUint8)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 1);
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 4);
- cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<uint8_t>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{7, 8, 10, 9}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Add_VarToVarInt8)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2);
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2., 3);
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
- cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_BroadcastAdd_VarToVarInt8)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2);
- int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2., 3);
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
- cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5}}, {{0, 4, 2, 6}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Add_VarToVarSame)
-{
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{in, in}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}}, {{2, 6, 4, 8}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Add_VarToVarSize0)
-{
- CircleGen cgen;
- int a = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
- int b = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
- int c = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
- int m = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{a, b}, {m}}, circle::ActivationFunctionType_NONE);
- cgen.addOperatorAdd({{m, c}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({a, b, c}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{}, {}, {}}, {{}}));
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_InvalidType)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
- int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_DifferentQuant8Type)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.2, -3);
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
- int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_INT8});
- cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_InvalidShape)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_InvalidShapeConst)
-{
- CircleGen cgen;
- std::vector<float> rhs_data{5, 4, 0, 7, 4, 0};
- uint32_t rhs_buf = cgen.addBuffer(rhs_data);
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
- int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({lhs}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_OneOperand)
-{
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{in}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_ThreeOperands)
-{
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{in, in, in}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_NoOutput)
-{
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{in}, {}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_InvalidActivation)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{lhs, rhs}, {out}},
- static_cast<circle::ActivationFunctionType>(128) /* Invalid value*/);
- cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}}));
- _context->setBackends({"cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Add_VarToVarSize0_InvalidShape)
-{
- CircleGen cgen;
- int a = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
- int b = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
- int c = cgen.addTensor({{2}, circle::TensorType::TensorType_FLOAT32});
- int m = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{a, b}, {m}}, circle::ActivationFunctionType_NONE);
- cgen.addOperatorAdd({{m, c}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({a, b, c}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailCompile();
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, net_OneOp_Add_VarToVarInt16)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 1., 2);
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 2., 3);
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 0.5, -6);
- cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- // _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Add.test.cc b/tests/nnfw_api/src/one_op_tests/Add.test.cc
new file mode 100644
index 000000000..9fc0e86b6
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Add.test.cc
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Add_VarToConst)
+{
+ CircleGen cgen;
+ std::vector<float> rhs_data{5, 4, 7, 4};
+ uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}}, {{6, 7, 9, 8}}));
+ _context->addTestCase(uniformTCD<float>({{0, 1, 2, 3}}, {{5, 5, 9, 7}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVar)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVarUint8)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 1);
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 4);
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<uint8_t>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{7, 8, 10, 9}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVarInt8)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2);
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 2., 3);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_BroadcastAdd_VarToVarInt8)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 1., 2);
+ int rhs = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_INT8}, 2., 3);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.5, -6);
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5}}, {{0, 4, 2, 6}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVarSame)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{in, in}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}}, {{2, 6, 4, 8}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Add_VarToVarSize0)
+{
+ CircleGen cgen;
+ int a = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+ int b = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+ int c = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+ int m = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{a, b}, {m}}, circle::ActivationFunctionType_NONE);
+ cgen.addOperatorAdd({{m, c}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({a, b, c}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{}, {}, {}}, {{}}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidType)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_DifferentQuant8Type)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT8}, 0.2, -3);
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 0.1, 2);
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_INT8});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidShape)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidShapeConst)
+{
+ CircleGen cgen;
+ std::vector<float> rhs_data{5, 4, 0, 7, 4, 0};
+ uint32_t rhs_buf = cgen.addBuffer(rhs_data);
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32, rhs_buf});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_OneOperand)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{in}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_ThreeOperands)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{in, in, in}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_NoOutput)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{in}, {}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_InvalidActivation)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {out}},
+ static_cast<circle::ActivationFunctionType>(128) /* Invalid value*/);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {5, 4, 7, 4}}, {{6, 7, 9, 8}}));
+ _context->setBackends({"cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_VarToVarSize0_InvalidShape)
+{
+ CircleGen cgen;
+ int a = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+ int b = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+ int c = cgen.addTensor({{2}, circle::TensorType::TensorType_FLOAT32});
+ int m = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{0}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{a, b}, {m}}, circle::ActivationFunctionType_NONE);
+ cgen.addOperatorAdd({{m, c}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({a, b, c}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailCompile();
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Add_VarToVarInt16)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 1., 2);
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 2., 3);
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT16}, 0.5, -6);
+ cgen.addOperatorAdd({{lhs, rhs}, {out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ // _context->addTestCase(uniformTCD<int8_t>({{1, 3, 2, 4}, {5, -4, -7, 4}}, {{0, -32, -46, 2}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/AddN.cc b/tests/nnfw_api/src/one_op_tests/AddN.test.cc
index 73fa82168..73fa82168 100644
--- a/tests/nnfw_api/src/one_op_tests/AddN.cc
+++ b/tests/nnfw_api/src/one_op_tests/AddN.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc b/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc
deleted file mode 100644
index dda098698..000000000
--- a/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-struct ArgMinMaxVariationParam
-{
- TestCaseData tcd;
- bool is_argmax = true;
- circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
- float scale = 0.0f;
- int64_t zero_point = 0;
-};
-
-class ArgMinMaxVariation : public GenModelTest,
- public ::testing::WithParamInterface<ArgMinMaxVariationParam>
-{
-};
-
-// Input shape: {1, 2, 2, 1}
-// Reduce axis: 1
-// Output shape: {1, 2, 1}
-// Output type: Int32
-// Test with different input type and value
-INSTANTIATE_TEST_CASE_P(
- GenModelTest, ArgMinMaxVariation,
- ::testing::Values(
- // ArgMax, float input
- ArgMinMaxVariationParam{TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}),
- true},
- // ArgMax, int32 input
- ArgMinMaxVariationParam{
- TestCaseData{}.addInput<int32_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
- circle::TensorType::TensorType_INT32},
- // ArgMax, uint8 input
- ArgMinMaxVariationParam{
- TestCaseData{}.addInput<uint8_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
- circle::TensorType::TensorType_UINT8, 1.0, 1},
- // ArgMax, int8 input
- ArgMinMaxVariationParam{
- TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
- circle::TensorType::TensorType_INT8, 1.0, 1},
- // ArgMin, float input
- ArgMinMaxVariationParam{TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}),
- false},
- // ArgMin, int32 input
- ArgMinMaxVariationParam{
- TestCaseData{}.addInput<int32_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
- circle::TensorType::TensorType_INT32},
- // ArgMin, uint8 input
- ArgMinMaxVariationParam{
- TestCaseData{}.addInput<uint8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
- circle::TensorType::TensorType_UINT8, 1.0, 1},
- // ArgMin, int8 input
- ArgMinMaxVariationParam{
- TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
- circle::TensorType::TensorType_INT8, 1.0, 1}));
-
-TEST_P(ArgMinMaxVariation, Test)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_INT32;
- std::vector<int32_t> axis_data{1};
- uint32_t axis_buf = cgen.addBuffer(axis_data);
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
- int out = cgen.addTensor({{1, 2, 1}, output_type});
- param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
- : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(param.tcd);
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_ArgMax_Int64_AxisToConst)
-{
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_INT64;
- std::vector<int32_t> axis_data{1};
- uint32_t axis_buf = cgen.addBuffer(axis_data);
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 1}, output_type});
- cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int64_t>({1, 0}));
- _context->setBackends({"acl_cl", "cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_ArgMax_AxisToVar)
-{
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_INT32;
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 1}, output_type});
- cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
- cgen.setInputsAndOutputs({in, axis}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(TestCaseData{}
- .addInput<float>({1, 4, 2, 3})
- .addInput<int32_t>({-3})
- .addOutput<int32_t>({1, 0}));
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
-
-TEST_P(ArgMinMaxVariation, neg_InvalidAxis0)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_INT32;
- std::vector<int32_t> axis_data{4};
- uint32_t axis_buf = cgen.addBuffer(axis_data);
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
- int out = cgen.addTensor({{1, 2, 1}, output_type});
- param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
- : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailCompile();
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_P(ArgMinMaxVariation, neg_InvalidAxis1)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_INT32;
- std::vector<int32_t> axis_data{-3};
- uint32_t axis_buf = cgen.addBuffer(axis_data);
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{2, 2}, param.input_type}, param.scale, param.zero_point);
- int out = cgen.addTensor({{2}, output_type});
- param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
- : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_ArgMax_InType)
-{
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_INT32;
- std::vector<int32_t> axis_data{4};
- uint32_t axis_buf = cgen.addBuffer(axis_data);
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
- int out = cgen.addTensor({{1, 2, 1}, output_type});
- cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_P(ArgMinMaxVariation, neg_AxisType)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_INT32;
- std::vector<float> axis_data{4};
- uint32_t axis_buf = cgen.addBuffer(axis_data);
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, axis_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
- int out = cgen.addTensor({{1, 2, 1}, output_type});
- param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
- : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_ArgMax_OutType)
-{
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_FLOAT32;
- std::vector<int32_t> axis_data{4};
- uint32_t axis_buf = cgen.addBuffer(axis_data);
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 1}, output_type});
- cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_P(ArgMinMaxVariation, neg_paramType)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_INT32;
- const auto output_param = circle::TensorType::TensorType_INT64;
- std::vector<int32_t> axis_data{4};
- uint32_t axis_buf = cgen.addBuffer(axis_data);
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
- int out = cgen.addTensor({{1, 2, 1}, output_type});
- param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_param)
- : cgen.addOperatorArgMin({{in, axis}, {out}}, output_param);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/ArgMinMax.test.cc b/tests/nnfw_api/src/one_op_tests/ArgMinMax.test.cc
new file mode 100644
index 000000000..1321552db
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/ArgMinMax.test.cc
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+struct ArgMinMaxVariationParam
+{
+ TestCaseData tcd;
+ bool is_argmax = true;
+ circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+ float scale = 0.0f;
+ int64_t zero_point = 0;
+};
+
+class ArgMinMaxVariation : public GenModelTest,
+ public ::testing::WithParamInterface<ArgMinMaxVariationParam>
+{
+};
+
+// Input shape: {1, 2, 2, 1}
+// Reduce axis: 1
+// Output shape: {1, 2, 1}
+// Output type: Int32
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(
+ GenModelTest, ArgMinMaxVariation,
+ ::testing::Values(
+ // ArgMax, float input
+ ArgMinMaxVariationParam{TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}),
+ true},
+ // ArgMax, int32 input
+ ArgMinMaxVariationParam{
+ TestCaseData{}.addInput<int32_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
+ circle::TensorType::TensorType_INT32},
+ // ArgMax, uint8 input
+ ArgMinMaxVariationParam{
+ TestCaseData{}.addInput<uint8_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
+ circle::TensorType::TensorType_UINT8, 1.0, 1},
+ // ArgMax, int8 input
+ ArgMinMaxVariationParam{
+ TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({1, 0}), true,
+ circle::TensorType::TensorType_INT8, 1.0, 1},
+ // ArgMin, float input
+ ArgMinMaxVariationParam{TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}),
+ false},
+ // ArgMin, int32 input
+ ArgMinMaxVariationParam{
+ TestCaseData{}.addInput<int32_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
+ circle::TensorType::TensorType_INT32},
+ // ArgMin, uint8 input
+ ArgMinMaxVariationParam{
+ TestCaseData{}.addInput<uint8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
+ circle::TensorType::TensorType_UINT8, 1.0, 1},
+ // ArgMin, int8 input
+ ArgMinMaxVariationParam{
+ TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
+ circle::TensorType::TensorType_INT8, 1.0, 1}));
+
+TEST_P(ArgMinMaxVariation, Test)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT32;
+ std::vector<int32_t> axis_data{1};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+ : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_ArgMax_Int64_AxisToConst)
+{
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT64;
+ std::vector<int32_t> axis_data{1};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(TestCaseData{}.addInput<float>({1, 4, 2, 3}).addOutput<int64_t>({1, 0}));
+ _context->setBackends({"acl_cl", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_ArgMax_AxisToVar)
+{
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT32;
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32});
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in, axis}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(TestCaseData{}
+ .addInput<float>({1, 4, 2, 3})
+ .addInput<int32_t>({-3})
+ .addOutput<int32_t>({1, 0}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_P(ArgMinMaxVariation, neg_InvalidAxis0)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT32;
+ std::vector<int32_t> axis_data{4};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+ : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailCompile();
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_P(ArgMinMaxVariation, neg_InvalidAxis1)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT32;
+ std::vector<int32_t> axis_data{-3};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{2, 2}, param.input_type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({{2}, output_type});
+ param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+ : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ArgMax_InType)
+{
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT32;
+ std::vector<int32_t> axis_data{4};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_P(ArgMinMaxVariation, neg_AxisType)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT32;
+ std::vector<float> axis_data{4};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+ : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ArgMax_OutType)
+{
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_FLOAT32;
+ std::vector<int32_t> axis_data{4};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_P(ArgMinMaxVariation, neg_paramType)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT32;
+ const auto output_param = circle::TensorType::TensorType_INT64;
+ std::vector<int32_t> axis_data{4};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_param)
+ : cgen.addOperatorArgMin({{in, axis}, {out}}, output_param);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
deleted file mode 100644
index 15ddac210..000000000
--- a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-struct AvgPool2DParam
-{
- TestCaseData tcd;
- std::vector<int32_t> input_shape;
- std::vector<int32_t> output_shape;
- struct filter_stride
- {
- int32_t filter_w;
- int32_t filter_h;
- int32_t stride_w;
- int32_t stride_h;
- } param = {1, 1, 1, 1};
- struct data_type
- {
- circle::TensorType data_type;
- float scale;
- int64_t zero_point;
- } type = {circle::TensorType::TensorType_FLOAT32, 0.0f, 0};
- std::vector<std::string> backend = {"acl_cl", "acl_neon", "cpu", "gpu_cl"};
-};
-
-class AveragePool2DVariation : public GenModelTest,
- public ::testing::WithParamInterface<AvgPool2DParam>
-{
-};
-
-// Test with different input type and value
-INSTANTIATE_TEST_CASE_P(
- GenModelTest, AveragePool2DVariation,
- ::testing::Values(
- // float data
- AvgPool2DParam{
- uniformTCD<float>({{1, 3, 2, 4}}, {{2.5}}), {1, 2, 2, 1}, {1, 1, 1, 1}, {2, 2, 2, 2}},
- // float data - large
- AvgPool2DParam{uniformTCD<float>({std::vector<float>(18 * 36 * 2, 99)}, {{99, 99, 99, 99}}),
- {1, 18, 36, 2},
- {1, 1, 2, 2},
- {18, 18, 18, 18}},
- // uint8_t data
- AvgPool2DParam{uniformTCD<uint8_t>({{2, 6, 4, 8}}, {{5}}),
- {1, 2, 2, 1},
- {1, 1, 1, 1},
- {2, 2, 2, 2},
- {circle::TensorType::TensorType_UINT8, 1.2, 3},
- {"acl_cl", "acl_neon", "cpu"}},
- // uint8_t data -large
- AvgPool2DParam{
- uniformTCD<uint8_t>({{std::vector<uint8_t>(18 * 36 * 2, 99)}}, {{99, 99, 99, 99}}),
- {1, 18, 36, 2},
- {1, 1, 2, 2},
- {18, 18, 18, 18},
- {circle::TensorType::TensorType_UINT8, 1.2, 3},
- {"acl_cl", "acl_neon", "cpu"}},
- // int8_t data
- // TODO enable acl-cl, acl-neon backend
- AvgPool2DParam{uniformTCD<int8_t>({{2, -6, 4, -8}}, {{-2}}),
- {1, 2, 2, 1},
- {1, 1, 1, 1},
- {2, 2, 2, 2},
- {circle::TensorType::TensorType_INT8, 2.0, -1},
- {"cpu"}},
- // int8_t data - large
- // TODO enable acl-cl, acl-neon backend
- AvgPool2DParam{
- uniformTCD<int8_t>({{std::vector<int8_t>(18 * 36 * 2, -99)}}, {{-99, -99, -99, -99}}),
- {1, 18, 36, 2},
- {1, 1, 2, 2},
- {18, 18, 18, 18},
- {circle::TensorType::TensorType_INT8, 2.0, -1},
- {"cpu"}}));
-
-TEST_P(AveragePool2DVariation, Test)
-{
- auto &param = GetParam();
- CircleGen cgen;
-
- int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
- param.type.zero_point);
- int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
- param.type.zero_point);
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
- param.param.stride_h, param.param.filter_w, param.param.filter_h,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(param.tcd);
- _context->setBackends(param.backend);
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D_3DInput)
-{
- // 3D Tensors are not supported
- CircleGen cgen;
- int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D_2DInput)
-{
- // 2D Tensors are not supported
- CircleGen cgen;
- int in = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
-
-TEST_P(AveragePool2DVariation, neg_InvalidPaddingType)
-{
- auto &param = GetParam();
- CircleGen cgen;
-
- int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
- param.type.zero_point);
- int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
- param.type.zero_point);
- cgen.addOperatorAveragePool2D({{in}, {out}}, static_cast<circle::Padding>(99),
- param.param.stride_w, param.param.stride_h, param.param.filter_w,
- param.param.filter_h, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_1)
-{
- auto &param = GetParam();
- CircleGen cgen;
-
- int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
- param.type.zero_point);
- int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
- param.type.zero_point);
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
- param.param.stride_h, -1, param.param.filter_h,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_2)
-{
- auto &param = GetParam();
- CircleGen cgen;
-
- int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
- param.type.zero_point);
- int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
- param.type.zero_point);
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
- param.param.stride_h, param.param.filter_w, 0,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_P(AveragePool2DVariation, neg_InvalidStrides_1)
-{
- auto &param = GetParam();
- CircleGen cgen;
-
- int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
- param.type.zero_point);
- int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
- param.type.zero_point);
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 0, param.param.stride_h,
- param.param.filter_w, param.param.filter_h,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_P(AveragePool2DVariation, neg_InvalidStrides_2)
-{
- auto &param = GetParam();
- CircleGen cgen;
-
- int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
- param.type.zero_point);
- int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
- param.type.zero_point);
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, -100,
- param.param.filter_w, param.param.filter_h,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/AveragePool2D.test.cc b/tests/nnfw_api/src/one_op_tests/AveragePool2D.test.cc
new file mode 100644
index 000000000..8276ca4c1
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/AveragePool2D.test.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct AvgPool2DParam
+{
+ TestCaseData tcd;
+ std::vector<int32_t> input_shape;
+ std::vector<int32_t> output_shape;
+ struct filter_stride
+ {
+ int32_t filter_w;
+ int32_t filter_h;
+ int32_t stride_w;
+ int32_t stride_h;
+ } param = {1, 1, 1, 1};
+ struct data_type
+ {
+ circle::TensorType data_type;
+ float scale;
+ int64_t zero_point;
+ } type = {circle::TensorType::TensorType_FLOAT32, 0.0f, 0};
+ std::vector<std::string> backend = {"acl_cl", "acl_neon", "cpu", "gpu_cl"};
+};
+
+class AveragePool2DVariation : public GenModelTest,
+ public ::testing::WithParamInterface<AvgPool2DParam>
+{
+};
+
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(
+ GenModelTest, AveragePool2DVariation,
+ ::testing::Values(
+ // float data
+ AvgPool2DParam{
+ uniformTCD<float>({{1, 3, 2, 4}}, {{2.5}}), {1, 2, 2, 1}, {1, 1, 1, 1}, {2, 2, 2, 2}},
+ // float data - large
+ AvgPool2DParam{uniformTCD<float>({std::vector<float>(18 * 36 * 2, 99)}, {{99, 99, 99, 99}}),
+ {1, 18, 36, 2},
+ {1, 1, 2, 2},
+ {18, 18, 18, 18}},
+ // uint8_t data
+ AvgPool2DParam{uniformTCD<uint8_t>({{2, 6, 4, 8}}, {{5}}),
+ {1, 2, 2, 1},
+ {1, 1, 1, 1},
+ {2, 2, 2, 2},
+ {circle::TensorType::TensorType_UINT8, 1.2, 3},
+ {"acl_cl", "acl_neon", "cpu"}},
+ // uint8_t data -large
+ AvgPool2DParam{
+ uniformTCD<uint8_t>({{std::vector<uint8_t>(18 * 36 * 2, 99)}}, {{99, 99, 99, 99}}),
+ {1, 18, 36, 2},
+ {1, 1, 2, 2},
+ {18, 18, 18, 18},
+ {circle::TensorType::TensorType_UINT8, 1.2, 3},
+ {"acl_cl", "acl_neon", "cpu"}},
+ // int8_t data
+ // TODO enable acl-cl, acl-neon backend
+ AvgPool2DParam{uniformTCD<int8_t>({{2, -6, 4, -8}}, {{-2}}),
+ {1, 2, 2, 1},
+ {1, 1, 1, 1},
+ {2, 2, 2, 2},
+ {circle::TensorType::TensorType_INT8, 2.0, -1},
+ {"cpu"}},
+ // int8_t data - large
+ // TODO enable acl-cl, acl-neon backend
+ AvgPool2DParam{
+ uniformTCD<int8_t>({{std::vector<int8_t>(18 * 36 * 2, -99)}}, {{-99, -99, -99, -99}}),
+ {1, 18, 36, 2},
+ {1, 1, 2, 2},
+ {18, 18, 18, 18},
+ {circle::TensorType::TensorType_INT8, 2.0, -1},
+ {"cpu"}}));
+
+TEST_P(AveragePool2DVariation, Test)
+{
+ auto &param = GetParam();
+ CircleGen cgen;
+
+ int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+ param.param.stride_h, param.param.filter_w, param.param.filter_h,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends(param.backend);
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_AvgPool2D_3DInput)
+{
+ // 3D Tensors are not supported
+ CircleGen cgen;
+ int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_AvgPool2D_2DInput)
+{
+ // 2D Tensors are not supported
+ CircleGen cgen;
+ int in = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidPaddingType)
+{
+ auto &param = GetParam();
+ CircleGen cgen;
+
+ int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ cgen.addOperatorAveragePool2D({{in}, {out}}, static_cast<circle::Padding>(99),
+ param.param.stride_w, param.param.stride_h, param.param.filter_w,
+ param.param.filter_h, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_1)
+{
+ auto &param = GetParam();
+ CircleGen cgen;
+
+ int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+ param.param.stride_h, -1, param.param.filter_h,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_2)
+{
+ auto &param = GetParam();
+ CircleGen cgen;
+
+ int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+ param.param.stride_h, param.param.filter_w, 0,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidStrides_1)
+{
+ auto &param = GetParam();
+ CircleGen cgen;
+
+ int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 0, param.param.stride_h,
+ param.param.filter_w, param.param.filter_h,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_P(AveragePool2DVariation, neg_InvalidStrides_2)
+{
+ auto &param = GetParam();
+ CircleGen cgen;
+
+ int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, -100,
+ param.param.filter_w, param.param.filter_h,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.cc b/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.test.cc
index 3f4554302..3f4554302 100644
--- a/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.cc
+++ b/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Cast.cc b/tests/nnfw_api/src/one_op_tests/Cast.cc
deleted file mode 100644
index 928df2d24..000000000
--- a/tests/nnfw_api/src/one_op_tests/Cast.cc
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-CircleGen genSimpleCastModel(circle::TensorType from_t, circle::TensorType to_t)
-{
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, from_t});
- int out = cgen.addTensor({{1, 2, 2, 1}, to_t});
- cgen.addOperatorCast({{in}, {out}}, from_t, to_t);
- cgen.setInputsAndOutputs({in}, {out});
- return cgen;
-}
-
-TEST_F(GenModelTest, OneOp_Cast_Int32ToFloat32)
-{
- CircleGen cgen = genSimpleCastModel(circle::TensorType_INT32, circle::TensorType_FLOAT32);
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(
- TestCaseData{}.addInput<int32_t>({1, 2, 3, 4}).addOutput<float>({1, 2, 3, 4}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Cast_Float32ToInt32)
-{
- CircleGen cgen = genSimpleCastModel(circle::TensorType_FLOAT32, circle::TensorType_INT32);
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(
- TestCaseData{}.addInput<float>({1, 2, 3, 4}).addOutput<int32_t>({1, 2, 3, 4}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Cast_BoolToFloat32)
-{
- CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_FLOAT32);
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(
- TestCaseData{}.addInput<bool>({true, false, true, true}).addOutput<float>({1, 0, 1, 1}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Cast_BoolToUInt8)
-{
- CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_UINT8);
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(TestCaseData{}
- .addInput<bool>({true, false, true, true})
- .addOutput(std::vector<uint8_t>{1, 0, 1, 1}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Cast_BoolToInt32)
-{
- CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_INT32);
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(
- TestCaseData{}.addInput<bool>({true, false, true, true}).addOutput<int32_t>({1, 0, 1, 1}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Cast_AfterEqual)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int equal_out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorEqual({{lhs, rhs}, {equal_out}});
- cgen.addOperatorCast({{equal_out}, {out}}, circle::TensorType::TensorType_BOOL,
- circle::TensorType::TensorType_FLOAT32);
- cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {2, 3, 1, 4}}, {{0, 1, 0, 1}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Cast_InvalidInputCount0)
-{
- CircleGen cgen;
- int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
- cgen.addOperatorCast({{}, {out}}, circle::TensorType::TensorType_FLOAT32,
- circle::TensorType::TensorType_INT32);
- cgen.setInputsAndOutputs({}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Cast_InvalidInputCount2)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
- int out = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorCast({{lhs, rhs}, {out}}, circle::TensorType::TensorType_INT32,
- circle::TensorType::TensorType_FLOAT32);
- cgen.setInputsAndOutputs({lhs, rhs}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Cast_InvalidOutputCount0)
-{
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
- cgen.addOperatorCast({{in}, {}}, circle::TensorType::TensorType_INT32,
- circle::TensorType::TensorType_FLOAT32);
- cgen.setInputsAndOutputs({in}, {});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Cast_InvalidOutputCount2)
-{
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
- int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
- cgen.addOperatorCast({{in}, {out1, out2}}, circle::TensorType::TensorType_INT32,
- circle::TensorType::TensorType_FLOAT32);
- cgen.setInputsAndOutputs({in}, {out1, out2});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Cast.test.cc b/tests/nnfw_api/src/one_op_tests/Cast.test.cc
new file mode 100644
index 000000000..b4cfa6f8f
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Cast.test.cc
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleGen genSimpleCastModel(circle::TensorType from_t, circle::TensorType to_t)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, from_t});
+ int out = cgen.addTensor({{1, 2, 2, 1}, to_t});
+ cgen.addOperatorCast({{in}, {out}}, from_t, to_t);
+ cgen.setInputsAndOutputs({in}, {out});
+ return cgen;
+}
+
+TEST_F(GenModelTest, OneOp_Cast_Int32ToFloat32)
+{
+ CircleGen cgen = genSimpleCastModel(circle::TensorType_INT32, circle::TensorType_FLOAT32);
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}.addInput<int32_t>({1, 2, 3, 4}).addOutput<float>({1, 2, 3, 4}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_Float32ToInt32)
+{
+ CircleGen cgen = genSimpleCastModel(circle::TensorType_FLOAT32, circle::TensorType_INT32);
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}.addInput<float>({1, 2, 3, 4}).addOutput<int32_t>({1, 2, 3, 4}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_BoolToFloat32)
+{
+ CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_FLOAT32);
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}.addInput<bool>({true, false, true, true}).addOutput<float>({1, 0, 1, 1}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_BoolToUInt8)
+{
+ CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_UINT8);
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(TestCaseData{}
+ .addInput<bool>({true, false, true, true})
+ .addOutput(std::vector<uint8_t>{1, 0, 1, 1}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_BoolToInt32)
+{
+ CircleGen cgen = genSimpleCastModel(circle::TensorType_BOOL, circle::TensorType_INT32);
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}.addInput<bool>({true, false, true, true}).addOutput<int32_t>({1, 0, 1, 1}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_Uint8ToFloat32)
+{
+ CircleGen cgen = genSimpleCastModel(circle::TensorType_UINT8, circle::TensorType_FLOAT32);
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ // clang-format off
+ _context->addTestCase(
+ TestCaseData{}.addInput<uint8_t>({0, 100, 200, 255})
+ .addOutput<float>({0., 100., 200., 255.}));
+ // clang-format on
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_Int64ToFloat32)
+{
+ CircleGen cgen = genSimpleCastModel(circle::TensorType_INT64, circle::TensorType_FLOAT32);
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(TestCaseData{}
+ .addInput<int64_t>({-12345, 3, 100, 2147483648})
+ .addOutput<float>({-12345., 3., 100., 2147483648.}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Cast_AfterEqual)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int equal_out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorEqual({{lhs, rhs}, {equal_out}});
+ cgen.addOperatorCast({{equal_out}, {out}}, circle::TensorType::TensorType_BOOL,
+ circle::TensorType::TensorType_FLOAT32);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{1, 3, 2, 4}, {2, 3, 1, 4}}, {{0, 1, 0, 1}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cast_InvalidInputCount0)
+{
+ CircleGen cgen;
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ cgen.addOperatorCast({{}, {out}}, circle::TensorType::TensorType_FLOAT32,
+ circle::TensorType::TensorType_INT32);
+ cgen.setInputsAndOutputs({}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cast_InvalidInputCount2)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ int out = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorCast({{lhs, rhs}, {out}}, circle::TensorType::TensorType_INT32,
+ circle::TensorType::TensorType_FLOAT32);
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cast_InvalidOutputCount0)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ cgen.addOperatorCast({{in}, {}}, circle::TensorType::TensorType_INT32,
+ circle::TensorType::TensorType_FLOAT32);
+ cgen.setInputsAndOutputs({in}, {});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Cast_InvalidOutputCount2)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ int out1 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out2 = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ cgen.addOperatorCast({{in}, {out1, out2}}, circle::TensorType::TensorType_INT32,
+ circle::TensorType::TensorType_FLOAT32);
+ cgen.setInputsAndOutputs({in}, {out1, out2});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Concat.cc b/tests/nnfw_api/src/one_op_tests/Concat.cc
deleted file mode 100644
index f4397ba66..000000000
--- a/tests/nnfw_api/src/one_op_tests/Concat.cc
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-TEST_F(GenModelTest, OneOp_Concat_ShareSubTensor)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int shared_subtensor = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int concat_out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
- std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
- uint32_t padding_buf = cgen.addBuffer(padding_data);
- int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
- int pad_out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAdd({{lhs, rhs}, {shared_subtensor}}, circle::ActivationFunctionType_NONE);
- cgen.addOperatorConcatenation({{rhs, shared_subtensor}, {concat_out}}, 3,
- circle::ActivationFunctionType_NONE);
- cgen.addOperatorPad({{shared_subtensor, padding}, {pad_out}});
- cgen.setInputsAndOutputs({lhs, rhs}, {pad_out, concat_out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>(
- {{1, 3, 2, 4}, {5, 4, 7, 4}},
- {{0, 0, 0, 0, 0, 6, 7, 0, 0, 9, 8, 0, 0, 0, 0, 0}, {5, 6, 4, 7, 7, 9, 4, 8}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-struct ConcatVariationParam
-{
- TestCaseData tcd;
- circle::TensorType type = circle::TensorType::TensorType_FLOAT32;
- float scale = 0.0f;
- int64_t zero_point = 0;
-};
-
-class ConcatVariation : public GenModelTest,
- public ::testing::WithParamInterface<ConcatVariationParam>
-{
-};
-
-// Input shape: {2, 3} / {2, 3}
-// Output shape: {4, 3}
-INSTANTIATE_TEST_CASE_P(
- GenModelTest, ConcatVariation,
- ::testing::Values(
- // Float
- ConcatVariationParam{uniformTCD<float>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
- {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}})},
- // Uint8
- ConcatVariationParam{uniformTCD<uint8_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
- {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
- circle::TensorType::TensorType_UINT8, 1.0f, -2},
- // Int8
- ConcatVariationParam{uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
- {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
- circle::TensorType::TensorType_INT8, 1.0f, -2},
- // Int16
- // TODO Enable when nnfw api support int16 type
- // ConcatVariationParam{
- // uniformTCD<int16_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
- // {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
- // circle::TensorType::TensorType_INT16, 1.0f, 0},
- // Int32
- ConcatVariationParam{uniformTCD<int32_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
- {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
- circle::TensorType::TensorType_INT32},
- // Int64
- ConcatVariationParam{uniformTCD<int64_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
- {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
- circle::TensorType::TensorType_INT64}));
-
-TEST_P(ConcatVariation, Test)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
- int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
- int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
- cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({input1, input2}, {output});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(param.tcd);
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D)
-{
- CircleGen cgen;
- int in1 = cgen.addTensor({{1, 1, 1, 20}, circle::TensorType::TensorType_FLOAT32});
- int in2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
- std::vector<int32_t> axis_data{3};
- uint32_t axis_buf = cgen.addBuffer(axis_data);
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
-
- int s_out1 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
- int s_out2 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
- int s_out3 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
- int s_out4 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
-
- int c_out1 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
- int c_out2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
- int c_out3 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
-
- int a_out1 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
- int a_out2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
- int a_out3 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
-
- int final_out = cgen.addTensor({{1, 1, 1, 35}, circle::TensorType::TensorType_FLOAT32});
-
- cgen.addOperatorSplit({{axis, in1}, {s_out1, s_out2, s_out3, s_out4}}, 4);
-
- cgen.addOperatorConcatenation({{s_out1, s_out2}, {c_out1}}, 3,
- circle::ActivationFunctionType::ActivationFunctionType_NONE);
- cgen.addOperatorConcatenation({{s_out1, s_out3}, {c_out2}}, 3,
- circle::ActivationFunctionType::ActivationFunctionType_NONE);
- cgen.addOperatorConcatenation({{s_out1, s_out4}, {c_out3}}, 3,
- circle::ActivationFunctionType::ActivationFunctionType_NONE);
-
- cgen.addOperatorAdd({{c_out1, in2}, {a_out1}},
- circle::ActivationFunctionType::ActivationFunctionType_NONE);
- cgen.addOperatorAdd({{c_out2, in2}, {a_out2}},
- circle::ActivationFunctionType::ActivationFunctionType_NONE);
- cgen.addOperatorAdd({{c_out3, in2}, {a_out3}},
- circle::ActivationFunctionType::ActivationFunctionType_NONE);
-
- cgen.addOperatorConcatenation({{s_out1, a_out1, a_out2, a_out3}, {final_out}}, 3,
- circle::ActivationFunctionType::ActivationFunctionType_NONE);
-
- cgen.setInputsAndOutputs({in1, in2}, {s_out1, s_out2, s_out3, s_out4, c_out1, c_out2, c_out3,
- a_out1, a_out2, a_out3, final_out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>(
- {
- // inputs
- {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, // in1
- {0, 0, 0, 0, 0, 0, 0, 0, 0, 0} // in2
- },
- {
- // outputs
- {1, 2, 3, 4, 5}, // s_out1
- {6, 7, 8, 9, 10}, // s_out2
- {11, 12, 13, 14, 15}, // s_out3
- {16, 17, 18, 19, 20}, // s_out4
- {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // c_out1
- {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // c_out2
- {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // c_out3
- {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // a_out1
- {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // a_out2
- {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // a_out3
- {1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3,
- 4, 5, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20} // final_out
- }));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_P(ConcatVariation, neg_InvalidAxis)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
- int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
- int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
- int axis = 2;
-
- cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({input1, input2}, {output});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"cpu"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
-
-TEST_P(ConcatVariation, neg_InvalidRank)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
- int input2 = cgen.addTensor({{1, 2, 3}, param.type}, param.scale, param.zero_point);
- int output = cgen.addTensor({{1, 4, 3}, param.type}, param.scale, param.zero_point);
- int axis = 0;
-
- cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({input1, input2}, {output});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
-
-TEST_P(ConcatVariation, neg_InvalidDimension)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
- int input2 = cgen.addTensor({{3, 2}, param.type}, param.scale, param.zero_point);
- int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
- int axis = 0;
-
- cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({input1, input2}, {output});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Concat.test.cc b/tests/nnfw_api/src/one_op_tests/Concat.test.cc
new file mode 100644
index 000000000..4f8360353
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Concat.test.cc
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_Concat_ShareSubTensor)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int shared_subtensor = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int concat_out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ int pad_out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{lhs, rhs}, {shared_subtensor}}, circle::ActivationFunctionType_NONE);
+ cgen.addOperatorConcatenation({{rhs, shared_subtensor}, {concat_out}}, 3,
+ circle::ActivationFunctionType_NONE);
+ cgen.addOperatorPad({{shared_subtensor, padding}, {pad_out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {pad_out, concat_out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>(
+ {{1, 3, 2, 4}, {5, 4, 7, 4}},
+ {{0, 0, 0, 0, 0, 6, 7, 0, 0, 9, 8, 0, 0, 0, 0, 0}, {5, 6, 4, 7, 7, 9, 4, 8}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+struct ConcatVariationParam
+{
+ TestCaseData tcd;
+ circle::TensorType type = circle::TensorType::TensorType_FLOAT32;
+ float scale = 0.0f;
+ int64_t zero_point = 0;
+};
+
+class ConcatVariation : public GenModelTest,
+ public ::testing::WithParamInterface<ConcatVariationParam>
+{
+};
+
+// Input shape: {2, 3} / {2, 3}
+// Output shape: {4, 3}
+INSTANTIATE_TEST_SUITE_P(
+ GenModelTest, ConcatVariation,
+ ::testing::Values(
+ // Float
+ ConcatVariationParam{uniformTCD<float>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+ {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}})},
+ // Uint8
+ ConcatVariationParam{uniformTCD<uint8_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+ {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+ circle::TensorType::TensorType_UINT8, 1.0f, -2},
+ // Int8
+ ConcatVariationParam{uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+ {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+ circle::TensorType::TensorType_INT8, 1.0f, -2},
+ // Int16
+ // TODO Enable when nnfw api support int16 type
+ // ConcatVariationParam{
+ // uniformTCD<int16_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+ // {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+ // circle::TensorType::TensorType_INT16, 1.0f, 0},
+ // Int32
+ ConcatVariationParam{uniformTCD<int32_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+ {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+ circle::TensorType::TensorType_INT32},
+ // Int64
+ ConcatVariationParam{uniformTCD<int64_t>({{1, 2, 3, 4, 5, 6}, {7, 8, 9, 10, 11, 12}},
+ {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
+ circle::TensorType::TensorType_INT64}));
+
+TEST_P(ConcatVariation, Test)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+ int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+ int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
+ cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({input1, input2}, {output});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D)
+{
+ CircleGen cgen;
+ int in1 = cgen.addTensor({{1, 1, 1, 20}, circle::TensorType::TensorType_FLOAT32});
+ int in2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<int32_t> axis_data{3};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+
+ int s_out1 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
+ int s_out2 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
+ int s_out3 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
+ int s_out4 = cgen.addTensor({{1, 1, 1, 5}, circle::TensorType::TensorType_FLOAT32});
+
+ int c_out1 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+ int c_out2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+ int c_out3 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+
+ int a_out1 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+ int a_out2 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+ int a_out3 = cgen.addTensor({{1, 1, 1, 10}, circle::TensorType::TensorType_FLOAT32});
+
+ int final_out = cgen.addTensor({{1, 1, 1, 35}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorSplit({{axis, in1}, {s_out1, s_out2, s_out3, s_out4}}, 4);
+
+ cgen.addOperatorConcatenation({{s_out1, s_out2}, {c_out1}}, 3,
+ circle::ActivationFunctionType::ActivationFunctionType_NONE);
+ cgen.addOperatorConcatenation({{s_out1, s_out3}, {c_out2}}, 3,
+ circle::ActivationFunctionType::ActivationFunctionType_NONE);
+ cgen.addOperatorConcatenation({{s_out1, s_out4}, {c_out3}}, 3,
+ circle::ActivationFunctionType::ActivationFunctionType_NONE);
+
+ cgen.addOperatorAdd({{c_out1, in2}, {a_out1}},
+ circle::ActivationFunctionType::ActivationFunctionType_NONE);
+ cgen.addOperatorAdd({{c_out2, in2}, {a_out2}},
+ circle::ActivationFunctionType::ActivationFunctionType_NONE);
+ cgen.addOperatorAdd({{c_out3, in2}, {a_out3}},
+ circle::ActivationFunctionType::ActivationFunctionType_NONE);
+
+ cgen.addOperatorConcatenation({{s_out1, a_out1, a_out2, a_out3}, {final_out}}, 3,
+ circle::ActivationFunctionType::ActivationFunctionType_NONE);
+
+ cgen.setInputsAndOutputs({in1, in2}, {s_out1, s_out2, s_out3, s_out4, c_out1, c_out2, c_out3,
+ a_out1, a_out2, a_out3, final_out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>(
+ {
+ // inputs
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, // in1
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0} // in2
+ },
+ {
+ // outputs
+ {1, 2, 3, 4, 5}, // s_out1
+ {6, 7, 8, 9, 10}, // s_out2
+ {11, 12, 13, 14, 15}, // s_out3
+ {16, 17, 18, 19, 20}, // s_out4
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // c_out1
+ {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // c_out2
+ {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // c_out3
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, // a_out1
+ {1, 2, 3, 4, 5, 11, 12, 13, 14, 15}, // a_out2
+ {1, 2, 3, 4, 5, 16, 17, 18, 19, 20}, // a_out3
+ {1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3,
+ 4, 5, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20} // final_out
+ }));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_P(ConcatVariation, neg_InvalidAxis)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+ int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+ int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
+ int axis = 2;
+
+ cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({input1, input2}, {output});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_P(ConcatVariation, neg_InvalidRank)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+ int input2 = cgen.addTensor({{1, 2, 3}, param.type}, param.scale, param.zero_point);
+ int output = cgen.addTensor({{1, 4, 3}, param.type}, param.scale, param.zero_point);
+ int axis = 0;
+
+ cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({input1, input2}, {output});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_P(ConcatVariation, neg_InvalidDimension)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+ int input2 = cgen.addTensor({{3, 2}, param.type}, param.scale, param.zero_point);
+ int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
+ int axis = 0;
+
+ cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({input1, input2}, {output});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Conv2D.cc b/tests/nnfw_api/src/one_op_tests/Conv2D.cc
deleted file mode 100644
index 4f58e3d53..000000000
--- a/tests/nnfw_api/src/one_op_tests/Conv2D.cc
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-TEST_F(GenModelTest, OneOp_Conv2D)
-{
- CircleGen cgen;
- std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{2, 3};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
- circle::ActivationFunctionType_NONE, 1, 1);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>(
- {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
- {{47, -4, -25, 9, 10, 10, -13, 11, -14, -26, -12, 26, 20, 40, 1, 3, 11, 4}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack", "gpu_cl"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Conv2D_Stride)
-{
- CircleGen cgen;
- std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{2, 3};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2,
- circle::ActivationFunctionType_NONE, 1, 1);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>(
- {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
- {{22, 27, -10, -2, 5, -8, 7, 3, -14, -26, -10, 18, 4, -13, -28, 9, 14, 1}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Conv2D_Dilation)
-{
- CircleGen cgen;
- std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{2, 3};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
- circle::ActivationFunctionType_NONE, 2, 2);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>(
- {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
- {{-52, 7}}));
- _context->setBackends({"cpu", "ruy", "xnnpack"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Conv2D_I8)
-{
- CircleGen cgen;
- std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<int32_t> bias_data{0, 2, 4};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
- int weight =
- cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 0);
- int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
- int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
- cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 38, 61}}));
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Conv2D_I8_PerChannel)
-{
- CircleGen cgen;
- std::vector<int8_t> weight_data{1, 2, 3, 1, 2, 3, 7, 8, 9};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<int32_t> bias_data{0, 0, 0};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
- std::vector<float> weight_scales = {0.5, 1, 0.5};
- std::vector<int64_t> weight_zeropoints = {0, 0, 0};
- int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
- weight_scales, weight_zeropoints);
- int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
- int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
- cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 30, 60}}));
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Conv2D_Type)
-{
- CircleGen cgen;
- std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{2, 3};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT16});
- cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
- circle::ActivationFunctionType_NONE, 1, 1);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Conv2D_Stride)
-{
- CircleGen cgen;
- std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{2, 3};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 0, 0,
- circle::ActivationFunctionType_NONE, 1, 1);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Conv2D_Dilation)
-{
- CircleGen cgen;
- std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{2, 3};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
- circle::ActivationFunctionType_NONE, 0, 0);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoint)
-{
- CircleGen cgen;
- std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<int32_t> bias_data{0, 2, 4};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
- int weight =
- cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 17);
- int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
- int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
- cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoints)
-{
- CircleGen cgen;
- std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<int32_t> bias_data{0, 2, 4};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
- std::vector<float> weight_scales = {0.5, 1, 0.5};
- std::vector<int64_t> weight_zeropoints = {0, 0, 10};
- int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
- weight_scales, weight_zeropoints);
- int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
- int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
- cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Conv2D.test.cc b/tests/nnfw_api/src/one_op_tests/Conv2D.test.cc
new file mode 100644
index 000000000..dccf2e5b8
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Conv2D.test.cc
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_Conv2D)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{2, 3};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE, 1, 1);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>(
+ {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
+ {{47, -4, -25, 9, 10, 10, -13, 11, -14, -26, -12, 26, 20, 40, 1, 3, 11, 4}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack", "gpu_cl"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_Stride)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{2, 3};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2,
+ circle::ActivationFunctionType_NONE, 1, 1);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>(
+ {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
+ {{22, 27, -10, -2, 5, -8, 7, 3, -14, -26, -10, 18, 4, -13, -28, 9, 14, 1}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "ruy", "xnnpack"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_Dilation)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{2, 3};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE, 2, 2);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>(
+ {{4, 0, -5, 1, 0, 4, -1, 1, -1, -3, 3, -2, -4, 1, -2, 2, 4, -4, 2, 2, 0, 4, -1, -2, 4}},
+ {{-52, 7}}));
+ _context->setBackends({"cpu", "ruy", "xnnpack"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_I8)
+{
+ CircleGen cgen;
+ std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<int32_t> bias_data{0, 2, 4};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+ int weight =
+ cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 0);
+ int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+ int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 38, 61}}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_I8_PerChannel)
+{
+ CircleGen cgen;
+ std::vector<int8_t> weight_data{1, 2, 3, 1, 2, 3, 7, 8, 9};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<int32_t> bias_data{0, 0, 0};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+ std::vector<float> weight_scales = {0.5, 1, 0.5};
+ std::vector<int64_t> weight_zeropoints = {0, 0, 0};
+ int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
+ weight_scales, weight_zeropoints);
+ int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+ int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 30, 60}}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Conv2D_U8_PerChannel)
+{
+ CircleGen cgen;
+ // weight
+ std::vector<uint8_t> weight_data{2, 6, 2, 1, 2, 3, 2, 3, 4};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> weight_scales = {.5, 1, 2};
+ std::vector<int64_t> weight_zeropoints = {2, 0, 1};
+ int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_UINT8, weight_buf},
+ weight_scales, weight_zeropoints);
+ // bias
+ std::vector<int32_t> bias_data{4, -8, -4};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1., 0);
+
+ // in and out
+ int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_UINT8}, 2., 1);
+ int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_UINT8}, 4., 2);
+
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<uint8_t>({{5, 3, 7}}, {{5, 11, 24}}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_Type)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{2, 3};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT16});
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE, 1, 1);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_Stride)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{2, 3};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 0, 0,
+ circle::ActivationFunctionType_NONE, 1, 1);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_Dilation)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{-2, 3, -5, 3, 4, 4, 0, 0, -4, -1, -4, -2, 0, 2, 0, -1, 4, 0};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{2, 3};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 5, 5, 1}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{2, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE, 0, 0);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoint)
+{
+ CircleGen cgen;
+ std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<int32_t> bias_data{0, 2, 4};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+ int weight =
+ cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 17);
+ int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+ int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoints)
+{
+ CircleGen cgen;
+ std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<int32_t> bias_data{0, 2, 4};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+ std::vector<float> weight_scales = {0.5, 1, 0.5};
+ std::vector<int64_t> weight_zeropoints = {0, 0, 10};
+ int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
+ weight_scales, weight_zeropoints);
+ int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
+ int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
+ cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Cos.cc b/tests/nnfw_api/src/one_op_tests/Cos.test.cc
index 03944746a..03944746a 100644
--- a/tests/nnfw_api/src/one_op_tests/Cos.cc
+++ b/tests/nnfw_api/src/one_op_tests/Cos.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc b/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc
deleted file mode 100644
index a4fe88493..000000000
--- a/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-struct DepthToSpaceVariationParam
-{
- TestCaseData tcd;
- circle::TensorType type = circle::TensorType::TensorType_FLOAT32;
- float scale = 0.0f;
- int64_t zero_point = 0;
-};
-
-class DepthToSpaceVariation : public GenModelTest,
- public ::testing::WithParamInterface<DepthToSpaceVariationParam>
-{
-};
-
-// Input shape: {1, 1, 2, 4}
-// Block size: 2
-// Output shape: {1, 2, 4, 1}
-INSTANTIATE_TEST_CASE_P(
- GenModelTest, DepthToSpaceVariation,
- ::testing::Values(
- // Float
- DepthToSpaceVariationParam{
- uniformTCD<float>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}})},
- // Int32
- DepthToSpaceVariationParam{
- uniformTCD<int32_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
- circle::TensorType::TensorType_INT32},
- // Int64
- DepthToSpaceVariationParam{
- uniformTCD<int64_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
- circle::TensorType::TensorType_INT64},
- // Uint8
- DepthToSpaceVariationParam{
- uniformTCD<uint8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
- circle::TensorType::TensorType_UINT8, 1.0f, -2},
- // Int8
- DepthToSpaceVariationParam{
- uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
- circle::TensorType::TensorType_INT8, 1.0f, -2}));
-
-TEST_P(DepthToSpaceVariation, Test)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point);
- int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point);
- cgen.addOperatorDepthToSpace({{in}, {out}}, 2);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(param.tcd);
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_P(DepthToSpaceVariation, neg_Blocksize)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point);
- int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point);
- cgen.addOperatorDepthToSpace({{in}, {out}}, -2);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/DepthToSpace.test.cc b/tests/nnfw_api/src/one_op_tests/DepthToSpace.test.cc
new file mode 100644
index 000000000..ad2272996
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/DepthToSpace.test.cc
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct DepthToSpaceVariationParam
+{
+ TestCaseData tcd;
+ circle::TensorType type = circle::TensorType::TensorType_FLOAT32;
+ float scale = 0.0f;
+ int64_t zero_point = 0;
+};
+
+class DepthToSpaceVariation : public GenModelTest,
+ public ::testing::WithParamInterface<DepthToSpaceVariationParam>
+{
+};
+
+// Input shape: {1, 1, 2, 4}
+// Block size: 2
+// Output shape: {1, 2, 4, 1}
+INSTANTIATE_TEST_SUITE_P(
+ GenModelTest, DepthToSpaceVariation,
+ ::testing::Values(
+ // Float
+ DepthToSpaceVariationParam{
+ uniformTCD<float>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}})},
+ // Int32
+ DepthToSpaceVariationParam{
+ uniformTCD<int32_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+ circle::TensorType::TensorType_INT32},
+ // Int64
+ DepthToSpaceVariationParam{
+ uniformTCD<int64_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+ circle::TensorType::TensorType_INT64},
+ // Uint8
+ DepthToSpaceVariationParam{
+ uniformTCD<uint8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+ circle::TensorType::TensorType_UINT8, 1.0f, -2},
+ // Int8
+ DepthToSpaceVariationParam{
+ uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
+ circle::TensorType::TensorType_INT8, 1.0f, -2}));
+
+TEST_P(DepthToSpaceVariation, Test)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point);
+ cgen.addOperatorDepthToSpace({{in}, {out}}, 2);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_P(DepthToSpaceVariation, neg_Blocksize)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point);
+ cgen.addOperatorDepthToSpace({{in}, {out}}, -2);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc
deleted file mode 100644
index a0bdbf9e6..000000000
--- a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc
+++ /dev/null
@@ -1,457 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-TEST_F(GenModelTest, OneOp_DepthwiseConv2D)
-{
- CircleGen cgen;
- std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{1, 2, 3, 4};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}},
- {{71, -34, 99, -20, 91, -26, 127, -4}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_DepthwiseConv2D_No_Multiplier)
-{
- CircleGen cgen;
- std::vector<float> weight_data{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{0.5f, -0.5f};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{1, 3, 1, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(
- uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
- {{16.5f, 27.5f, 28.5f, 43.5f, 8.5f, 15.5f, 12.5f, 23.5f}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_DepthwiseConv2D_No_Multiplier_RELU6)
-{
- CircleGen cgen;
- std::vector<float> weight_data{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{0.5f, -0.5f};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{1, 3, 1, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
- circle::ActivationFunctionType_RELU6);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
- {{6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_DepthwiseConv2D_3x3)
-{
- CircleGen cgen;
- std::vector<float> weight_data{0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 1.0f,
- 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{0.0f, 0.0f};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(
- uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
- {{6.0f, 16.0f, 8.0f, 16.0f, 10.0f, 16.0f, 12.0f, 16.0f}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation)
-{
- CircleGen cgen;
- std::vector<float> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{0, 0, 0, 0};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 4, 4, 2}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
- circle::ActivationFunctionType_NONE, 2, 2);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
- 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- }},
- {{13, 14, 0, 0, 0, 0, 11, 12, 5, 6, 0, 0, 0, 0, 3, 4}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation_N_Stride)
-{
- CircleGen cgen;
- std::vector<float> weight_data{1, 2, 3, 4};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{0, 0, 0, 0};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 6, 6, 1}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2, 1,
- circle::ActivationFunctionType_NONE, 3, 3);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
- 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
- {{4, 0, 3, 0, 0, 0, 2, 0, 1}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack", "gpu_cl"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Stride)
-{
- CircleGen cgen;
- std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{1, 2, 3, 4};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 0, 0, 2,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Dilation)
-{
- CircleGen cgen;
- std::vector<float> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{0, 0, 0, 0};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 4, 4, 2}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
- circle::ActivationFunctionType_NONE, 0, 0);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Type)
-{
- CircleGen cgen;
- std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<float> bias_data{1, 2, 3, 4};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
- int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
- int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
- int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_UINT8});
- cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-// Generate a model for negative test cases
-CircleBuffer genNegTestDepthwiseConv2DModel(circle::Padding padding, int stride_w, int stride_h,
- int depth_multiplier,
- circle::ActivationFunctionType actfn)
-{
- CircleGen cgen;
- uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{0, 1, 2, 3, 0, 1, 2, 3});
- uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>{0, 0});
- int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType_UINT8}, 0.5, 0);
- int ker = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
- int bias = cgen.addTensor({{2}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
- int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType_UINT8}, 1, 0);
- cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, padding, stride_w, stride_h,
- depth_multiplier, actfn, 0, 0);
- cgen.setInputsAndOutputs({in}, {out});
- return cgen.finish();
-}
-
-template <typename T> struct DepthwiseConv2DQuantTestParam
-{
- int stride = 1; // Used for both height and width
- int input_depth = 1;
- int depth_multiplier = 1;
- std::vector<T> ref_output;
-};
-
-template <typename T>
-class DepthwiseConv2DQuantTest
- : public GenModelTest,
- public ::testing::WithParamInterface<DepthwiseConv2DQuantTestParam<T>>
-{
-};
-
-using DepthwiseConv2DQuantTestParamU8 = DepthwiseConv2DQuantTestParam<uint8_t>;
-using DepthwiseConv2DQuantTestU8 = DepthwiseConv2DQuantTest<uint8_t>;
-
-// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
-// kernels.
-INSTANTIATE_TEST_CASE_P(
- GenModelTest, DepthwiseConv2DQuantTestU8,
- ::testing::Values(
- // Stride == 1
- DepthwiseConv2DQuantTestParamU8{1, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
- DepthwiseConv2DQuantTestParamU8{1, 4, 2, std::vector<uint8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
- DepthwiseConv2DQuantTestParamU8{
- 1, 2, 8, std::vector<uint8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
- DepthwiseConv2DQuantTestParamU8{1, 2, 2, std::vector<uint8_t>{0, 1, 4, 6}},
- DepthwiseConv2DQuantTestParamU8{1, 2, 1, std::vector<uint8_t>{2, 5}},
- DepthwiseConv2DQuantTestParamU8{1, 1, 2, std::vector<uint8_t>{2, 4}},
- DepthwiseConv2DQuantTestParamU8{1, 1, 4, std::vector<uint8_t>{0, 2, 3, 5}},
- DepthwiseConv2DQuantTestParamU8{1, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
- DepthwiseConv2DQuantTestParamU8{
- 1, 4, 4, std::vector<uint8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
- DepthwiseConv2DQuantTestParamU8{1, 12, 1,
- std::vector<uint8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
- // Stride == 2
- DepthwiseConv2DQuantTestParamU8{2, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
- DepthwiseConv2DQuantTestParamU8{2, 2, 1, std::vector<uint8_t>{2, 5}},
- DepthwiseConv2DQuantTestParamU8{2, 1, 8, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DQuantTestParamU8{2, 1, 32, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
- 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
- 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DQuantTestParamU8{
- 2, 1, 20, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DQuantTestParamU8{
- 2, 1, 16, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DQuantTestParamU8{2, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
- DepthwiseConv2DQuantTestParamU8{
- 2, 8, 2, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
- DepthwiseConv2DQuantTestParamU8{
- 2, 16, 1, std::vector<uint8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
-
-CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier)
-{
- assert(1 <= stride && stride <= 2);
- assert(1 <= input_depth && input_depth <= 16);
- assert(1 <= depth_multiplier && depth_multiplier <= 32);
-
- const int output_depth = input_depth * depth_multiplier;
- assert(1 <= output_depth && output_depth <= 32);
-
- CircleGen cgen;
- uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
- 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
- 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
- uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
- int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0);
- int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
- int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
- int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_UINT8}, 1, 0);
- cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
- stride, depth_multiplier, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
- return cgen.finish();
-}
-
-TEST_P(DepthwiseConv2DQuantTestU8, Test)
-{
- // Same input is used for all tests but output differs
- static const std::vector<uint8_t> input64{
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
- 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
-
- auto &param = GetParam();
- _context = std::make_unique<GenModelTestContext>(
- genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier));
- std::vector<uint8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
- _context->addTestCase(uniformTCD<uint8_t>({ref_input}, {param.ref_output}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam<int8_t>;
-using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest<int8_t>;
-
-// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
-// kernels.
-INSTANTIATE_TEST_CASE_P(
- GenModelTest, DepthwiseConv2DQuantTestI8,
- ::testing::Values(
- // Stride == 1
- DepthwiseConv2DQuantTestParamI8{1, 8, 1, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
- DepthwiseConv2DQuantTestParamI8{1, 4, 2, std::vector<int8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
- DepthwiseConv2DQuantTestParamI8{
- 1, 2, 8, std::vector<int8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
- DepthwiseConv2DQuantTestParamI8{1, 2, 2, std::vector<int8_t>{0, 1, 4, 6}},
- DepthwiseConv2DQuantTestParamI8{1, 2, 1, std::vector<int8_t>{2, 5}},
- DepthwiseConv2DQuantTestParamI8{1, 1, 2, std::vector<int8_t>{2, 4}},
- DepthwiseConv2DQuantTestParamI8{1, 1, 4, std::vector<int8_t>{0, 2, 3, 5}},
- DepthwiseConv2DQuantTestParamI8{1, 4, 1, std::vector<int8_t>{0, 1, 4, 9}},
- DepthwiseConv2DQuantTestParamI8{
- 1, 4, 4, std::vector<int8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
- DepthwiseConv2DQuantTestParamI8{1, 12, 1,
- std::vector<int8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
- // Stride == 2
- DepthwiseConv2DQuantTestParamI8{2, 4, 1, std::vector<int8_t>{0, 1, 4, 9}},
- DepthwiseConv2DQuantTestParamI8{2, 2, 1, std::vector<int8_t>{2, 5}},
- DepthwiseConv2DQuantTestParamI8{2, 1, 8, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DQuantTestParamI8{2, 1, 32, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
- 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
- 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DQuantTestParamI8{
- 2, 1, 20, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DQuantTestParamI8{
- 2, 1, 16, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
- DepthwiseConv2DQuantTestParamI8{2, 8, 1, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
- DepthwiseConv2DQuantTestParamI8{
- 2, 8, 2, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
- DepthwiseConv2DQuantTestParamI8{
- 2, 16, 1, std::vector<int8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
-
-CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier)
-{
- assert(1 <= stride && stride <= 2);
- assert(1 <= input_depth && input_depth <= 16);
- assert(1 <= depth_multiplier && depth_multiplier <= 32);
-
- const int output_depth = input_depth * depth_multiplier;
- assert(1 <= output_depth && output_depth <= 32);
-
- CircleGen cgen;
- uint32_t ker_buf = cgen.addBuffer(std::vector<int8_t>{
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
- 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
- 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
- uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
- int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0);
- int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0);
- int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
- int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0);
- cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
- stride, depth_multiplier, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
- return cgen.finish();
-}
-
-TEST_P(DepthwiseConv2DQuantTestI8, Test)
-{
- // Same input is used for all tests but output differs
- static const std::vector<int8_t> input64{
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
- 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
-
- auto &param = GetParam();
- _context = std::make_unique<GenModelTestContext>(
- genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier));
- std::vector<int8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
- _context->addTestCase(uniformTCD<int8_t>({ref_input}, {param.ref_output}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType)
-{
- _context = std::make_unique<GenModelTestContext>(genNegTestDepthwiseConv2DModel(
- static_cast<circle::Padding>(99), 1, 1, 1, circle::ActivationFunctionType_NONE));
- _context->expectFailModelLoad();
- _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
-
- SUCCEED();
-}
-
-// TODO add other invalid operation tests like above
-
-TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_I8_NonZero_ZeroPoints)
-{
- CircleGen cgen;
- std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8};
- uint32_t weight_buf = cgen.addBuffer(weight_data);
- std::vector<int32_t> bias_data{0, 2};
- uint32_t bias_buf = cgen.addBuffer(bias_data);
- int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT8}, 0.5, 0);
- std::vector<float> weight_scales = {0.5, 1};
- std::vector<int64_t> weight_zeropoints = {0, 10};
- int weight = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_INT8, weight_buf},
- weight_scales, weight_zeropoints);
- int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_INT32, bias_buf});
- int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
- cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.test.cc b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.test.cc
new file mode 100644
index 000000000..f82d988d5
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.test.cc
@@ -0,0 +1,502 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{1, 2, 3, 4};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}},
+ {{71, -34, 99, -20, 91, -26, 127, -4}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_No_Multiplier)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{0.5f, -0.5f};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{1, 3, 1, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
+ {{16.5f, 27.5f, 28.5f, 43.5f, 8.5f, 15.5f, 12.5f, 23.5f}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_No_Multiplier_RELU6)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{0.5f, -0.5f};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{1, 3, 1, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
+ circle::ActivationFunctionType_RELU6);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
+ {{6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_3x3)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 1.0f,
+ 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{0.0f, 0.0f};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 1, 1, 1,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ uniformTCD<float>({{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}},
+ {{6.0f, 16.0f, 8.0f, 16.0f, 10.0f, 16.0f, 12.0f, 16.0f}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "gpu_cl"});
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{0, 0, 0, 0};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 4, 4, 2}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+ circle::ActivationFunctionType_NONE, 2, 2);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+ 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ }},
+ {{13, 14, 0, 0, 0, 0, 11, 12, 5, 6, 0, 0, 0, 0, 3, 4}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_Dilation_N_Stride)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{1, 2, 3, 4};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{0, 0, 0, 0};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 6, 6, 1}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 3, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_SAME, 2, 2, 1,
+ circle::ActivationFunctionType_NONE, 3, 3);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
+ 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
+ {{4, 0, 3, 0, 0, 0, 2, 0, 1}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack", "gpu_cl"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_DepthwiseConv2D_U8_PerChannel)
+{
+ CircleGen cgen;
+ // weight
+ // clang-format off
+ std::vector<uint8_t> weight_data{2, 1, 2,
+ 6, 2, 3,
+ 2, 3, 4,
+ 4, 4, 5};
+ // clang-format on
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> weight_scales = {.5, 1, 2};
+ std::vector<int64_t> weight_zeropoints = {2, 0, 1};
+ int weight = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_UINT8, weight_buf},
+ weight_scales, weight_zeropoints);
+ // bias
+ std::vector<int32_t> bias_data{4, -8, -4};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1., 0);
+
+ // in and out
+ int in = cgen.addTensor({{1, 2, 2, 3}, circle::TensorType::TensorType_UINT8}, 2., 1);
+ int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_UINT8}, 4., 2);
+
+ cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 1,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ // clang-format off
+ _context->addTestCase(uniformTCD<uint8_t>({{5, 5, 5, // NHWC
+ 3, 3, 3,
+ 7, 7, 7,
+ 9, 9, 9}
+ },
+ {{9,
+ 27,
+ 56}
+ }));
+ // clang-format on
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Stride)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{1, 2, 3, 4};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 0, 0, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Dilation)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{0, 0, 0, 0};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 4, 4, 2}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+ circle::ActivationFunctionType_NONE, 0, 0);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_Type)
+{
+ CircleGen cgen;
+ std::vector<float> weight_data{1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<float> bias_data{1, 2, 3, 4};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 3, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+ int weight = cgen.addTensor({{1, 2, 2, 4}, circle::TensorType::TensorType_FLOAT32, weight_buf});
+ int bias = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32, bias_buf});
+ int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_UINT8});
+ cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+// Generate a model for negative test cases
+CircleBuffer genNegTestDepthwiseConv2DModel(circle::Padding padding, int stride_w, int stride_h,
+ int depth_multiplier,
+ circle::ActivationFunctionType actfn)
+{
+ CircleGen cgen;
+ uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{0, 1, 2, 3, 0, 1, 2, 3});
+ uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>{0, 0});
+ int in = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType_UINT8}, 0.5, 0);
+ int ker = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
+ int bias = cgen.addTensor({{2}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
+ int out = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType_UINT8}, 1, 0);
+ cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, padding, stride_w, stride_h,
+ depth_multiplier, actfn, 0, 0);
+ cgen.setInputsAndOutputs({in}, {out});
+ return cgen.finish();
+}
+
+template <typename T> struct DepthwiseConv2DQuantTestParam
+{
+ int stride = 1; // Used for both height and width
+ int input_depth = 1;
+ int depth_multiplier = 1;
+ std::vector<T> ref_output;
+};
+
+template <typename T>
+class DepthwiseConv2DQuantTest
+ : public GenModelTest,
+ public ::testing::WithParamInterface<DepthwiseConv2DQuantTestParam<T>>
+{
+};
+
+using DepthwiseConv2DQuantTestParamU8 = DepthwiseConv2DQuantTestParam<uint8_t>;
+using DepthwiseConv2DQuantTestU8 = DepthwiseConv2DQuantTest<uint8_t>;
+
+// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
+// kernels.
+INSTANTIATE_TEST_SUITE_P(
+ GenModelTest, DepthwiseConv2DQuantTestU8,
+ ::testing::Values(
+ // Stride == 1
+ DepthwiseConv2DQuantTestParamU8{1, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+ DepthwiseConv2DQuantTestParamU8{1, 4, 2, std::vector<uint8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
+ DepthwiseConv2DQuantTestParamU8{
+ 1, 2, 8, std::vector<uint8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
+ DepthwiseConv2DQuantTestParamU8{1, 2, 2, std::vector<uint8_t>{0, 1, 4, 6}},
+ DepthwiseConv2DQuantTestParamU8{1, 2, 1, std::vector<uint8_t>{2, 5}},
+ DepthwiseConv2DQuantTestParamU8{1, 1, 2, std::vector<uint8_t>{2, 4}},
+ DepthwiseConv2DQuantTestParamU8{1, 1, 4, std::vector<uint8_t>{0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamU8{1, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
+ DepthwiseConv2DQuantTestParamU8{
+ 1, 4, 4, std::vector<uint8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
+ DepthwiseConv2DQuantTestParamU8{1, 12, 1,
+ std::vector<uint8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
+ // Stride == 2
+ DepthwiseConv2DQuantTestParamU8{2, 4, 1, std::vector<uint8_t>{0, 1, 4, 9}},
+ DepthwiseConv2DQuantTestParamU8{2, 2, 1, std::vector<uint8_t>{2, 5}},
+ DepthwiseConv2DQuantTestParamU8{2, 1, 8, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamU8{2, 1, 32, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
+ 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
+ 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamU8{
+ 2, 1, 20, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamU8{
+ 2, 1, 16, std::vector<uint8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamU8{2, 8, 1, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+ DepthwiseConv2DQuantTestParamU8{
+ 2, 8, 2, std::vector<uint8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
+ DepthwiseConv2DQuantTestParamU8{
+ 2, 16, 1, std::vector<uint8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
+
+CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier)
+{
+ assert(1 <= stride && stride <= 2);
+ assert(1 <= input_depth && input_depth <= 16);
+ assert(1 <= depth_multiplier && depth_multiplier <= 32);
+
+ const int output_depth = input_depth * depth_multiplier;
+ assert(1 <= output_depth && output_depth <= 32);
+
+ CircleGen cgen;
+ uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+ 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+ 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
+ uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
+ int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0);
+ int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
+ int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
+ int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_UINT8}, 1, 0);
+ cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
+ stride, depth_multiplier, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+ return cgen.finish();
+}
+
+TEST_P(DepthwiseConv2DQuantTestU8, Test)
+{
+ // Same input is used for all tests but output differs
+ static const std::vector<uint8_t> input64{
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
+ 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
+
+ auto &param = GetParam();
+ _context = std::make_unique<GenModelTestContext>(
+ genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier));
+ std::vector<uint8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
+ _context->addTestCase(uniformTCD<uint8_t>({ref_input}, {param.ref_output}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam<int8_t>;
+using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest<int8_t>;
+
+// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
+// kernels.
+INSTANTIATE_TEST_SUITE_P(
+ GenModelTest, DepthwiseConv2DQuantTestI8,
+ ::testing::Values(
+ // Stride == 1
+ DepthwiseConv2DQuantTestParamI8{1, 8, 1, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+ DepthwiseConv2DQuantTestParamI8{1, 4, 2, std::vector<int8_t>{0, 0, 2, 3, 0, 2, 6, 9}},
+ DepthwiseConv2DQuantTestParamI8{
+ 1, 2, 8, std::vector<int8_t>{0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 4, 6, 0, 2, 4, 6}},
+ DepthwiseConv2DQuantTestParamI8{1, 2, 2, std::vector<int8_t>{0, 1, 4, 6}},
+ DepthwiseConv2DQuantTestParamI8{1, 2, 1, std::vector<int8_t>{2, 5}},
+ DepthwiseConv2DQuantTestParamI8{1, 1, 2, std::vector<int8_t>{2, 4}},
+ DepthwiseConv2DQuantTestParamI8{1, 1, 4, std::vector<int8_t>{0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamI8{1, 4, 1, std::vector<int8_t>{0, 1, 4, 9}},
+ DepthwiseConv2DQuantTestParamI8{
+ 1, 4, 4, std::vector<int8_t>{0, 0, 0, 0, 0, 1, 2, 3, 0, 2, 4, 6, 0, 3, 6, 9}},
+ DepthwiseConv2DQuantTestParamI8{1, 12, 1,
+ std::vector<int8_t>{0, 3, 7, 12, 0, 4, 7, 12, 0, 4, 9, 16}},
+ // Stride == 2
+ DepthwiseConv2DQuantTestParamI8{2, 4, 1, std::vector<int8_t>{0, 1, 4, 9}},
+ DepthwiseConv2DQuantTestParamI8{2, 2, 1, std::vector<int8_t>{2, 5}},
+ DepthwiseConv2DQuantTestParamI8{2, 1, 8, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamI8{2, 1, 32, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3,
+ 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2,
+ 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamI8{
+ 2, 1, 20, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamI8{
+ 2, 1, 16, std::vector<int8_t>{0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5, 0, 2, 3, 5}},
+ DepthwiseConv2DQuantTestParamI8{2, 8, 1, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8}},
+ DepthwiseConv2DQuantTestParamI8{
+ 2, 8, 2, std::vector<int8_t>{0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8, 0, 3, 5, 8}},
+ DepthwiseConv2DQuantTestParamI8{
+ 2, 16, 1, std::vector<int8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
+
+CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier)
+{
+ assert(1 <= stride && stride <= 2);
+ assert(1 <= input_depth && input_depth <= 16);
+ assert(1 <= depth_multiplier && depth_multiplier <= 32);
+
+ const int output_depth = input_depth * depth_multiplier;
+ assert(1 <= output_depth && output_depth <= 32);
+
+ CircleGen cgen;
+ uint32_t ker_buf = cgen.addBuffer(std::vector<int8_t>{
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+ 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+ 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
+ uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
+ int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0);
+ int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0);
+ int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
+ int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0);
+ cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
+ stride, depth_multiplier, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+ return cgen.finish();
+}
+
+TEST_P(DepthwiseConv2DQuantTestI8, Test)
+{
+ // Same input is used for all tests but output differs
+ static const std::vector<int8_t> input64{
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
+ 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
+
+ auto &param = GetParam();
+ _context = std::make_unique<GenModelTestContext>(
+ genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier));
+ std::vector<int8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
+ _context->addTestCase(uniformTCD<int8_t>({ref_input}, {param.ref_output}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType)
+{
+ _context = std::make_unique<GenModelTestContext>(genNegTestDepthwiseConv2DModel(
+ static_cast<circle::Padding>(99), 1, 1, 1, circle::ActivationFunctionType_NONE));
+ _context->expectFailModelLoad();
+ _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
+
+ SUCCEED();
+}
+
+// TODO add other invalid operation tests like above
+
+TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_I8_NonZero_ZeroPoints)
+{
+ CircleGen cgen;
+ std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8};
+ uint32_t weight_buf = cgen.addBuffer(weight_data);
+ std::vector<int32_t> bias_data{0, 2};
+ uint32_t bias_buf = cgen.addBuffer(bias_data);
+ int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_INT8}, 0.5, 0);
+ std::vector<float> weight_scales = {0.5, 1};
+ std::vector<int64_t> weight_zeropoints = {0, 10};
+ int weight = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_INT8, weight_buf},
+ weight_scales, weight_zeropoints);
+ int bias = cgen.addTensor({{1, 1, 1, 2}, circle::TensorType::TensorType_INT32, bias_buf});
+ int out = cgen.addTensor({{1, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
+ cgen.addOperatorDepthwiseConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1, 2,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc b/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.test.cc
index 188638bbb..188638bbb 100644
--- a/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc
+++ b/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Elu.cc b/tests/nnfw_api/src/one_op_tests/Elu.test.cc
index a037070b2..a037070b2 100644
--- a/tests/nnfw_api/src/one_op_tests/Elu.cc
+++ b/tests/nnfw_api/src/one_op_tests/Elu.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Equal.cc b/tests/nnfw_api/src/one_op_tests/Equal.test.cc
index da890978e..da890978e 100644
--- a/tests/nnfw_api/src/one_op_tests/Equal.cc
+++ b/tests/nnfw_api/src/one_op_tests/Equal.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/ExpandDims.cc b/tests/nnfw_api/src/one_op_tests/ExpandDims.test.cc
index 280cf7344..280cf7344 100644
--- a/tests/nnfw_api/src/one_op_tests/ExpandDims.cc
+++ b/tests/nnfw_api/src/one_op_tests/ExpandDims.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Fill.cc b/tests/nnfw_api/src/one_op_tests/Fill.cc
deleted file mode 100644
index 4d5e4d8be..000000000
--- a/tests/nnfw_api/src/one_op_tests/Fill.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-struct FillVariationParam
-{
- TestCaseData tcd;
- const uint8_t *value_data = nullptr;
- circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
-};
-
-class FillVariation : public GenModelTest, public ::testing::WithParamInterface<FillVariationParam>
-{
-};
-
-// value is constant
-TEST_P(FillVariation, Test)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
-
- size_t value_size =
- (param.data_type == circle::TensorType::TensorType_INT64) ? sizeof(int64_t) : sizeof(int32_t);
- uint32_t value_buf = cgen.addBuffer(param.value_data, value_size);
-
- int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
- int value = cgen.addTensor({{1}, param.data_type, value_buf});
- int out = cgen.addTensor({{2, 3}, param.data_type});
- cgen.addOperatorFill({{dims, value}, {out}});
- cgen.setInputsAndOutputs({dims}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(param.tcd);
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
-
-const int32_t test_int32 = 13;
-const int64_t test_int64 = 1052;
-const float test_float = 5.2;
-
-// Test with different value type
-INSTANTIATE_TEST_CASE_P(
- GenModelTest, FillVariation,
- ::testing::Values(
- // float value
- FillVariationParam{
- TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<float>({5.2, 5.2, 5.2, 5.2, 5.2, 5.2}),
- reinterpret_cast<const uint8_t *>(&test_float)},
- // int32 value
- FillVariationParam{
- TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int32_t>({13, 13, 13, 13, 13, 13}),
- reinterpret_cast<const uint8_t *>(&test_int32), circle::TensorType::TensorType_INT32},
- // uint8 value
- FillVariationParam{
- TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int64_t>({1052, 1052, 1052, 1052, 1052,
- 1052}),
- reinterpret_cast<const uint8_t *>(&test_int64), circle::TensorType::TensorType_INT64}));
-
-TEST_F(GenModelTest, OneOp_Fill_Int64_Shape)
-{
- CircleGen cgen;
- std::vector<float> value_data{1.3};
- uint32_t value_buf = cgen.addBuffer(value_data);
-
- int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT64});
- int value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, value_buf});
- int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorFill({{dims, value}, {out}});
- cgen.setInputsAndOutputs({dims}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(
- TestCaseData{}.addInput<int64_t>({2, 3}).addOutput<float>({1.3, 1.3, 1.3, 1.3, 1.3, 1.3}));
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Fill_Int32_oneoperand)
-{
- CircleGen cgen;
-
- int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
- int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT32});
- cgen.addOperatorFill({{in}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(
- TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int32_t>({13, 13, 13, 13, 13, 13}));
- _context->setBackends({"cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Fill_Int64_oneoperand)
-{
- CircleGen cgen;
-
- int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
- int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT64});
- cgen.addOperatorFill({{in}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(
- TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int64_t>({13, 13, 13, 13, 13, 13}));
- _context->setBackends({"cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Fill_Float32_oneoperand)
-{
- CircleGen cgen;
-
- int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
- int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorFill({{in}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(
- TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<float>({1.3, 1.3, 1.3, 1.3, 1.3, 1.3}));
- _context->setBackends({"cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Fill.test.cc b/tests/nnfw_api/src/one_op_tests/Fill.test.cc
new file mode 100644
index 000000000..0d34056b3
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Fill.test.cc
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct FillVariationParam
+{
+ TestCaseData tcd;
+ const uint8_t *value_data = nullptr;
+ circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+};
+
+class FillVariation : public GenModelTest, public ::testing::WithParamInterface<FillVariationParam>
+{
+};
+
+// value is constant
+TEST_P(FillVariation, Test)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+
+ size_t value_size =
+ (param.data_type == circle::TensorType::TensorType_INT64) ? sizeof(int64_t) : sizeof(int32_t);
+ uint32_t value_buf = cgen.addBuffer(param.value_data, value_size);
+
+ int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+ int value = cgen.addTensor({{1}, param.data_type, value_buf});
+ int out = cgen.addTensor({{2, 3}, param.data_type});
+ cgen.addOperatorFill({{dims, value}, {out}});
+ cgen.setInputsAndOutputs({dims}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+const int32_t test_int32 = 13;
+const int64_t test_int64 = 1052;
+const float test_float = 5.2;
+
+// Test with different value type
+INSTANTIATE_TEST_SUITE_P(
+ GenModelTest, FillVariation,
+ ::testing::Values(
+ // float value
+ FillVariationParam{
+ TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<float>({5.2, 5.2, 5.2, 5.2, 5.2, 5.2}),
+ reinterpret_cast<const uint8_t *>(&test_float)},
+ // int32 value
+ FillVariationParam{
+ TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int32_t>({13, 13, 13, 13, 13, 13}),
+ reinterpret_cast<const uint8_t *>(&test_int32), circle::TensorType::TensorType_INT32},
+ // uint8 value
+ FillVariationParam{
+ TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int64_t>({1052, 1052, 1052, 1052, 1052,
+ 1052}),
+ reinterpret_cast<const uint8_t *>(&test_int64), circle::TensorType::TensorType_INT64}));
+
+TEST_F(GenModelTest, OneOp_Fill_Int64_Shape)
+{
+ CircleGen cgen;
+ std::vector<float> value_data{1.3};
+ uint32_t value_buf = cgen.addBuffer(value_data);
+
+ int dims = cgen.addTensor({{2}, circle::TensorType::TensorType_INT64});
+ int value = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, value_buf});
+ int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorFill({{dims, value}, {out}});
+ cgen.setInputsAndOutputs({dims}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}.addInput<int64_t>({2, 3}).addOutput<float>({1.3, 1.3, 1.3, 1.3, 1.3, 1.3}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Fill_Int32_oneoperand)
+{
+ CircleGen cgen;
+
+ int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+ int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT32});
+ cgen.addOperatorFill({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int32_t>({13, 13, 13, 13, 13, 13}));
+ _context->setBackends({"cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Fill_Int64_oneoperand)
+{
+ CircleGen cgen;
+
+ int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+ int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_INT64});
+ cgen.addOperatorFill({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<int64_t>({13, 13, 13, 13, 13, 13}));
+ _context->setBackends({"cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Fill_Float32_oneoperand)
+{
+ CircleGen cgen;
+
+ int in = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+ int out = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorFill({{in}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(
+ TestCaseData{}.addInput<int32_t>({2, 3}).addOutput<float>({1.3, 1.3, 1.3, 1.3, 1.3, 1.3}));
+ _context->setBackends({"cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Floor.cc b/tests/nnfw_api/src/one_op_tests/Floor.test.cc
index dcb402027..dcb402027 100644
--- a/tests/nnfw_api/src/one_op_tests/Floor.cc
+++ b/tests/nnfw_api/src/one_op_tests/Floor.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/FloorDiv.cc b/tests/nnfw_api/src/one_op_tests/FloorDiv.test.cc
index edbca8504..edbca8504 100644
--- a/tests/nnfw_api/src/one_op_tests/FloorDiv.cc
+++ b/tests/nnfw_api/src/one_op_tests/FloorDiv.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/FullyConnected.cc b/tests/nnfw_api/src/one_op_tests/FullyConnected.test.cc
index 791787f9b..791787f9b 100644
--- a/tests/nnfw_api/src/one_op_tests/FullyConnected.cc
+++ b/tests/nnfw_api/src/one_op_tests/FullyConnected.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Greater.test.cc b/tests/nnfw_api/src/one_op_tests/Greater.test.cc
new file mode 100644
index 000000000..b63075c0e
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Greater.test.cc
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct GreaterVariationParam
+{
+ TestCaseData tcd;
+ circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+ const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class GreaterVariation : public GenModelTest,
+ public ::testing::WithParamInterface<GreaterVariationParam>
+{
+};
+
+// Input shape:
+// Base: {1, 2, 2, 1}
+// Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, GreaterVariation,
+ ::testing::Values(
+ // Float type
+ GreaterVariationParam{TestCaseData{}
+ .addInput<float>({0.1, 0.3, 0.2, 0.7})
+ .addInput<float>({0.1, 0.2, 0.3, 0.4})
+ .addOutput<bool>({false, true, false, true})},
+ // Float type - broadcast
+ GreaterVariationParam{TestCaseData{}
+ .addInput<float>({0.1, 0.3, 0.2, 0.7})
+ .addInput<float>({0.3})
+ .addOutput<bool>({false, false, false, true})},
+ // Int32 type
+ GreaterVariationParam{TestCaseData{}
+ .addInput<int32_t>({1, 3, 2, 7})
+ .addInput<int32_t>({1, 2, 3, 4})
+ .addOutput<bool>({false, true, false, true}),
+ circle::TensorType::TensorType_INT32},
+ // Int32 type - broadcast
+ GreaterVariationParam{TestCaseData{}
+ .addInput<int32_t>({1, 3, 2, 7})
+ .addInput<int32_t>({5})
+ .addOutput<bool>({false, false, false, true}),
+ circle::TensorType::TensorType_INT32},
+ // Int64 type
+ // NYI: acl backend
+ GreaterVariationParam{TestCaseData{}
+ .addInput<int64_t>({1, 3, -2, 7})
+ .addInput<int64_t>({1, 2, 3, 4})
+ .addOutput<bool>({false, true, false, true}),
+ circle::TensorType::TensorType_INT64,
+ {"cpu"}},
+ // Int64 type - broadcast
+ // NYI: acl backend
+ GreaterVariationParam{TestCaseData{}
+ .addInput<int64_t>({1, 3, -2, 7})
+ .addInput<int64_t>({1})
+ .addOutput<bool>({false, true, false, true}),
+ circle::TensorType::TensorType_INT64,
+ {"cpu"}}));
+
+TEST_P(GreaterVariation, Test)
+{
+ auto &param = GetParam();
+
+ auto lhs_data = param.tcd.inputs.at(0);
+ auto rhs_data = param.tcd.inputs.at(1);
+
+ bool broadcast_lhs = false;
+ bool broadcast_rhs = false;
+ if (lhs_data.size() != rhs_data.size())
+ {
+ if (lhs_data.size() < rhs_data.size())
+ broadcast_lhs = true;
+ else
+ broadcast_rhs = true;
+ }
+
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_BOOL;
+
+ int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+ : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+ int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+ : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+ int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+ cgen.addOperatorGreater({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends(param.backends);
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Greater_DifferentType)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+ cgen.addOperatorGreater({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Greater_InvalidType)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ cgen.addOperatorGreater({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/GreaterEqual.test.cc b/tests/nnfw_api/src/one_op_tests/GreaterEqual.test.cc
new file mode 100644
index 000000000..f824030e0
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/GreaterEqual.test.cc
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct GreaterEqualVariationParam
+{
+ TestCaseData tcd;
+ circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+ const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class GreaterEqualVariation : public GenModelTest,
+ public ::testing::WithParamInterface<GreaterEqualVariationParam>
+{
+};
+
+// Input shape:
+// Base: {1, 2, 2, 1}
+// Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(
+ GenModelTest, GreaterEqualVariation,
+ ::testing::Values(
+ // Float type
+ GreaterEqualVariationParam{TestCaseData{}
+ .addInput<float>({0.1, 0.3, 0.2, 0.7})
+ .addInput<float>({0.1, 0.2, 0.3, 0.4})
+ .addOutput<bool>({true, true, false, true})},
+ // Float type - broadcast
+ GreaterEqualVariationParam{TestCaseData{}
+ .addInput<float>({0.1, 0.3, 0.2, 0.7})
+ .addInput<float>({0.3})
+ .addOutput<bool>({false, true, false, true})},
+ // Int32 type
+ GreaterEqualVariationParam{TestCaseData{}
+ .addInput<int32_t>({1, 3, 2, 7})
+ .addInput<int32_t>({1, 2, 3, 4})
+ .addOutput<bool>({true, true, false, true}),
+ circle::TensorType::TensorType_INT32},
+ // Int32 type - broadcast
+ GreaterEqualVariationParam{TestCaseData{}
+ .addInput<int32_t>({1, 3, 2, 7})
+ .addInput<int32_t>({5})
+ .addOutput<bool>({false, false, false, true}),
+ circle::TensorType::TensorType_INT32},
+ // Int64 type
+ // NYI: acl backend
+ GreaterEqualVariationParam{TestCaseData{}
+ .addInput<int64_t>({1, 3, -2, 7})
+ .addInput<int64_t>({1, 2, 3, 4})
+ .addOutput<bool>({true, true, false, true}),
+ circle::TensorType::TensorType_INT64,
+ {"cpu"}},
+ // Int64 type - broadcast
+ // NYI: acl backend
+ GreaterEqualVariationParam{TestCaseData{}
+ .addInput<int64_t>({1, 3, -2, 7})
+ .addInput<int64_t>({1})
+ .addOutput<bool>({true, true, false, true}),
+ circle::TensorType::TensorType_INT64,
+ {"cpu"}}));
+
+TEST_P(GreaterEqualVariation, Test)
+{
+ auto &param = GetParam();
+
+ auto lhs_data = param.tcd.inputs.at(0);
+ auto rhs_data = param.tcd.inputs.at(1);
+
+ bool broadcast_lhs = false;
+ bool broadcast_rhs = false;
+ if (lhs_data.size() != rhs_data.size())
+ {
+ if (lhs_data.size() < rhs_data.size())
+ broadcast_lhs = true;
+ else
+ broadcast_rhs = true;
+ }
+
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_BOOL;
+
+ int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+ : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+ int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+ : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+ int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+ cgen.addOperatorGreaterEqual({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends(param.backends);
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_GreaterEqual_DifferentType)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+ cgen.addOperatorGreaterEqual({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_GreaterEqual_InvalidType)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ cgen.addOperatorGreaterEqual({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/If.cc b/tests/nnfw_api/src/one_op_tests/If.cc
deleted file mode 100644
index 4ec294223..000000000
--- a/tests/nnfw_api/src/one_op_tests/If.cc
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-TEST_F(GenModelTest, OneOp_If)
-{
- // The model looks just like the below pseudocode
- //
- // function model(x)
- // {
- // if (x < 0.0)
- // return -100.0;
- // else
- // return 100.0;
- // }
-
- CircleGen cgen;
-
- // constant buffers
- std::vector<float> comp_data{0.0};
- uint32_t comp_buf = cgen.addBuffer(comp_data);
- std::vector<float> then_data{-100};
- uint32_t then_buf = cgen.addBuffer(then_data);
- std::vector<float> else_data{100};
- uint32_t else_buf = cgen.addBuffer(else_data);
-
- // primary subgraph
- {
- int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int comp = cgen.addTensor({{1}, circle::TensorType_FLOAT32, comp_buf});
- int cond = cgen.addTensor({{1}, circle::TensorType_BOOL});
- cgen.addOperatorLess({{x, comp}, {cond}});
-
- int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- cgen.addOperatorIf({{cond}, {ret}}, 1, 2);
-
- cgen.setInputsAndOutputs({x}, {ret});
- }
-
- // then subgraph
- {
- cgen.nextSubgraph();
- int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, then_buf});
- cgen.setInputsAndOutputs({}, {ret});
- }
-
- // else subgraph
- {
- cgen.nextSubgraph();
- int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, else_buf});
- cgen.setInputsAndOutputs({}, {ret});
- }
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{-1.0}}, {{-100.0}}));
- _context->addTestCase(uniformTCD<float>({{1.0}}, {{100.0}}));
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
-
-class IfWrongSubgraphIndex : public GenModelTest,
- public ::testing::WithParamInterface<std::pair<int, int>>
-{
-};
-
-TEST_P(IfWrongSubgraphIndex, neg_Test)
-{
- // These values must be less than 0 or greater than 2
- int then_subg = GetParam().first;
- int else_subg = GetParam().second;
-
- // When If operation's subgraph index is invalid
-
- CircleGen cgen;
-
- // constant buffers
- std::vector<float> then_data{-100};
- uint32_t then_buf = cgen.addBuffer(then_data);
- std::vector<float> else_data{100};
- uint32_t else_buf = cgen.addBuffer(else_data);
-
- // primary subgraph
- {
- int x = cgen.addTensor({{1}, circle::TensorType_BOOL});
- int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- cgen.addOperatorIf({{x}, {ret}}, then_subg, else_subg);
-
- cgen.setInputsAndOutputs({x}, {ret});
- }
-
- // then subgraph
- {
- cgen.nextSubgraph();
- int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, then_buf});
- cgen.setInputsAndOutputs({}, {ret});
- }
-
- // else subgraph
- {
- cgen.nextSubgraph();
- int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, else_buf});
- cgen.setInputsAndOutputs({}, {ret});
- }
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-INSTANTIATE_TEST_CASE_P(GenModelTest, IfWrongSubgraphIndex,
- ::testing::Values(std::make_pair(99, 2), std::make_pair(-1, 2),
- std::make_pair(1, 99), std::make_pair(1, -99),
- std::make_pair(-99, 99)));
diff --git a/tests/nnfw_api/src/one_op_tests/If.test.cc b/tests/nnfw_api/src/one_op_tests/If.test.cc
new file mode 100644
index 000000000..543d87980
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/If.test.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_If)
+{
+ // The model looks just like the below pseudocode
+ //
+ // function model(x)
+ // {
+ // if (x < 0.0)
+ // return -100.0;
+ // else
+ // return 100.0;
+ // }
+
+ CircleGen cgen;
+
+ // constant buffers
+ std::vector<float> comp_data{0.0};
+ uint32_t comp_buf = cgen.addBuffer(comp_data);
+ std::vector<float> then_data{-100};
+ uint32_t then_buf = cgen.addBuffer(then_data);
+ std::vector<float> else_data{100};
+ uint32_t else_buf = cgen.addBuffer(else_data);
+
+ // primary subgraph
+ {
+ int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int comp = cgen.addTensor({{1}, circle::TensorType_FLOAT32, comp_buf});
+ int cond = cgen.addTensor({{1}, circle::TensorType_BOOL});
+ cgen.addOperatorLess({{x, comp}, {cond}});
+
+ int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ cgen.addOperatorIf({{cond}, {ret}}, 1, 2);
+
+ cgen.setInputsAndOutputs({x}, {ret});
+ }
+
+ // then subgraph
+ {
+ cgen.nextSubgraph();
+ int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, then_buf});
+ cgen.setInputsAndOutputs({}, {ret});
+ }
+
+ // else subgraph
+ {
+ cgen.nextSubgraph();
+ int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, else_buf});
+ cgen.setInputsAndOutputs({}, {ret});
+ }
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{-1.0}}, {{-100.0}}));
+ _context->addTestCase(uniformTCD<float>({{1.0}}, {{100.0}}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+class IfWrongSubgraphIndex : public GenModelTest,
+ public ::testing::WithParamInterface<std::pair<int, int>>
+{
+};
+
+TEST_P(IfWrongSubgraphIndex, neg_Test)
+{
+ // These values must be less than 0 or greater than 2
+ int then_subg = GetParam().first;
+ int else_subg = GetParam().second;
+
+ // When If operation's subgraph index is invalid
+
+ CircleGen cgen;
+
+ // constant buffers
+ std::vector<float> then_data{-100};
+ uint32_t then_buf = cgen.addBuffer(then_data);
+ std::vector<float> else_data{100};
+ uint32_t else_buf = cgen.addBuffer(else_data);
+
+ // primary subgraph
+ {
+ int x = cgen.addTensor({{1}, circle::TensorType_BOOL});
+ int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ cgen.addOperatorIf({{x}, {ret}}, then_subg, else_subg);
+
+ cgen.setInputsAndOutputs({x}, {ret});
+ }
+
+ // then subgraph
+ {
+ cgen.nextSubgraph();
+ int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, then_buf});
+ cgen.setInputsAndOutputs({}, {ret});
+ }
+
+ // else subgraph
+ {
+ cgen.nextSubgraph();
+ int ret = cgen.addTensor({{1}, circle::TensorType_FLOAT32, else_buf});
+ cgen.setInputsAndOutputs({}, {ret});
+ }
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+INSTANTIATE_TEST_SUITE_P(GenModelTest, IfWrongSubgraphIndex,
+ ::testing::Values(std::make_pair(99, 2), std::make_pair(-1, 2),
+ std::make_pair(1, 99), std::make_pair(1, -99),
+ std::make_pair(-99, 99)));
diff --git a/tests/nnfw_api/src/one_op_tests/InstanceNorm.cc b/tests/nnfw_api/src/one_op_tests/InstanceNorm.test.cc
index 6569ced21..6569ced21 100644
--- a/tests/nnfw_api/src/one_op_tests/InstanceNorm.cc
+++ b/tests/nnfw_api/src/one_op_tests/InstanceNorm.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/L2Normalization.cc b/tests/nnfw_api/src/one_op_tests/L2Normalization.test.cc
index f825fec5c..f825fec5c 100644
--- a/tests/nnfw_api/src/one_op_tests/L2Normalization.cc
+++ b/tests/nnfw_api/src/one_op_tests/L2Normalization.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc b/tests/nnfw_api/src/one_op_tests/LeakyRelu.test.cc
index cb3af4ee2..cb3af4ee2 100644
--- a/tests/nnfw_api/src/one_op_tests/LeakyRelu.cc
+++ b/tests/nnfw_api/src/one_op_tests/LeakyRelu.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Less.test.cc b/tests/nnfw_api/src/one_op_tests/Less.test.cc
new file mode 100644
index 000000000..6f76465ae
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Less.test.cc
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct LessVariationParam
+{
+ TestCaseData tcd;
+ circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+ const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class LessVariation : public GenModelTest, public ::testing::WithParamInterface<LessVariationParam>
+{
+};
+
+// Input shape:
+// Base: {1, 2, 2, 1}
+// Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, LessVariation,
+ ::testing::Values(
+ // Float type
+ LessVariationParam{TestCaseData{}
+ .addInput<float>({0.1, 0.3, 0.2, 0.7})
+ .addInput<float>({0.1, 0.2, 0.3, 0.4})
+ .addOutput<bool>({false, false, true, false})},
+ // Float type - broadcast
+ LessVariationParam{TestCaseData{}
+ .addInput<float>({0.1, 0.3, 0.2, 0.7})
+ .addInput<float>({0.3})
+ .addOutput<bool>({true, false, true, false})},
+ // Int32 type
+ LessVariationParam{TestCaseData{}
+ .addInput<int32_t>({1, 3, 2, 7})
+ .addInput<int32_t>({1, 2, 3, 4})
+ .addOutput<bool>({false, false, true, false}),
+ circle::TensorType::TensorType_INT32},
+ // Int32 type - broadcast
+ LessVariationParam{TestCaseData{}
+ .addInput<int32_t>({1, 3, 2, 7})
+ .addInput<int32_t>({5})
+ .addOutput<bool>({true, true, true, false}),
+ circle::TensorType::TensorType_INT32},
+ // Int64 type
+ // NYI: acl backend
+ LessVariationParam{TestCaseData{}
+ .addInput<int64_t>({1, 3, -2, 7})
+ .addInput<int64_t>({1, 2, 3, 4})
+ .addOutput<bool>({false, false, true, false}),
+ circle::TensorType::TensorType_INT64,
+ {"cpu"}},
+ // Int64 type - broadcast
+ // NYI: acl backend
+ LessVariationParam{TestCaseData{}
+ .addInput<int64_t>({1, 3, -2, 7})
+ .addInput<int64_t>({1})
+ .addOutput<bool>({false, false, true, false}),
+ circle::TensorType::TensorType_INT64,
+ {"cpu"}}));
+
+TEST_P(LessVariation, Test)
+{
+ auto &param = GetParam();
+
+ auto lhs_data = param.tcd.inputs.at(0);
+ auto rhs_data = param.tcd.inputs.at(1);
+
+ bool broadcast_lhs = false;
+ bool broadcast_rhs = false;
+ if (lhs_data.size() != rhs_data.size())
+ {
+ if (lhs_data.size() < rhs_data.size())
+ broadcast_lhs = true;
+ else
+ broadcast_rhs = true;
+ }
+
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_BOOL;
+
+ int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+ : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+ int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+ : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+ int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+ cgen.addOperatorLess({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends(param.backends);
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Less_DifferentType)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+ cgen.addOperatorLess({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Less_InvalidType)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ cgen.addOperatorLess({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/LessEqual.test.cc b/tests/nnfw_api/src/one_op_tests/LessEqual.test.cc
new file mode 100644
index 000000000..e0e6d6698
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/LessEqual.test.cc
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct LessEqualVariationParam
+{
+ TestCaseData tcd;
+ circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+ const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class LessEqualVariation : public GenModelTest,
+ public ::testing::WithParamInterface<LessEqualVariationParam>
+{
+};
+
+// Input shape:
+// Base: {1, 2, 2, 1}
+// Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, LessEqualVariation,
+ ::testing::Values(
+ // Float type
+ LessEqualVariationParam{TestCaseData{}
+ .addInput<float>({0.1, 0.3, 0.2, 0.7})
+ .addInput<float>({0.1, 0.2, 0.3, 0.4})
+ .addOutput<bool>({true, false, true, false})},
+ // Float type - broadcast
+ LessEqualVariationParam{TestCaseData{}
+ .addInput<float>({0.1, 0.3, 0.2, 0.7})
+ .addInput<float>({0.3})
+ .addOutput<bool>({true, true, true, false})},
+ // Int32 type
+ LessEqualVariationParam{TestCaseData{}
+ .addInput<int32_t>({1, 3, 2, 7})
+ .addInput<int32_t>({1, 2, 3, 4})
+ .addOutput<bool>({true, false, true, false}),
+ circle::TensorType::TensorType_INT32},
+ // Int32 type - broadcast
+ LessEqualVariationParam{TestCaseData{}
+ .addInput<int32_t>({1, 3, 2, 7})
+ .addInput<int32_t>({5})
+ .addOutput<bool>({true, true, true, false}),
+ circle::TensorType::TensorType_INT32},
+ // Int64 type
+ // NYI: acl backend
+ LessEqualVariationParam{TestCaseData{}
+ .addInput<int64_t>({1, 3, -2, 7})
+ .addInput<int64_t>({1, 2, 3, 4})
+ .addOutput<bool>({true, false, true, false}),
+ circle::TensorType::TensorType_INT64,
+ {"cpu"}},
+ // Int64 type - broadcast
+ // NYI: acl backend
+ LessEqualVariationParam{TestCaseData{}
+ .addInput<int64_t>({1, 3, -2, 7})
+ .addInput<int64_t>({1})
+ .addOutput<bool>({true, false, true, false}),
+ circle::TensorType::TensorType_INT64,
+ {"cpu"}}));
+
+TEST_P(LessEqualVariation, Test)
+{
+ auto &param = GetParam();
+
+ auto lhs_data = param.tcd.inputs.at(0);
+ auto rhs_data = param.tcd.inputs.at(1);
+
+ bool broadcast_lhs = false;
+ bool broadcast_rhs = false;
+ if (lhs_data.size() != rhs_data.size())
+ {
+ if (lhs_data.size() < rhs_data.size())
+ broadcast_lhs = true;
+ else
+ broadcast_rhs = true;
+ }
+
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_BOOL;
+
+ int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+ : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+ int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+ : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+ int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+ cgen.addOperatorLessEqual({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends(param.backends);
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_LessEqual_DifferentType)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+ cgen.addOperatorLessEqual({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_LessEqual_InvalidType)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ cgen.addOperatorLessEqual({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/LogSoftmax.cc b/tests/nnfw_api/src/one_op_tests/LogSoftmax.test.cc
index 5834fa53a..5834fa53a 100644
--- a/tests/nnfw_api/src/one_op_tests/LogSoftmax.cc
+++ b/tests/nnfw_api/src/one_op_tests/LogSoftmax.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Mean.cc b/tests/nnfw_api/src/one_op_tests/Mean.test.cc
index 6293d3837..6293d3837 100644
--- a/tests/nnfw_api/src/one_op_tests/Mean.cc
+++ b/tests/nnfw_api/src/one_op_tests/Mean.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Mul.cc b/tests/nnfw_api/src/one_op_tests/Mul.test.cc
index 0c7944613..0c7944613 100644
--- a/tests/nnfw_api/src/one_op_tests/Mul.cc
+++ b/tests/nnfw_api/src/one_op_tests/Mul.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Neg.cc b/tests/nnfw_api/src/one_op_tests/Neg.test.cc
index 7bc0cc452..7bc0cc452 100644
--- a/tests/nnfw_api/src/one_op_tests/Neg.cc
+++ b/tests/nnfw_api/src/one_op_tests/Neg.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/NotEqual.test.cc b/tests/nnfw_api/src/one_op_tests/NotEqual.test.cc
new file mode 100644
index 000000000..6a3fec150
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/NotEqual.test.cc
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct NotEqualVariationParam
+{
+ TestCaseData tcd;
+ circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+ const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class NotEqualVariation : public GenModelTest,
+ public ::testing::WithParamInterface<NotEqualVariationParam>
+{
+};
+
+// Input shape:
+// Base: {1, 2, 2, 1}
+// Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, NotEqualVariation,
+ ::testing::Values(
+ // Float type
+ NotEqualVariationParam{TestCaseData{}
+ .addInput<float>({0.1, 0.3, 0.5, 0.7})
+ .addInput<float>({0.1, 0.2, 0.3, 0.4})
+ .addOutput<bool>({false, true, true, true})},
+ // Float type - broadcast
+ NotEqualVariationParam{TestCaseData{}
+ .addInput<float>({0.1, 0.3, 0.5, 0.7})
+ .addInput<float>({0.3})
+ .addOutput<bool>({true, false, true, true})},
+ // Int32 type
+ NotEqualVariationParam{TestCaseData{}
+ .addInput<int32_t>({1, 3, 5, 7})
+ .addInput<int32_t>({1, 2, 3, 4})
+ .addOutput<bool>({false, true, true, true}),
+ circle::TensorType::TensorType_INT32},
+ // Int32 type - broadcast
+ NotEqualVariationParam{TestCaseData{}
+ .addInput<int32_t>({1, 3, 5, 7})
+ .addInput<int32_t>({5})
+ .addOutput<bool>({true, true, false, true}),
+ circle::TensorType::TensorType_INT32},
+ // Int64 type
+ // NYI: acl backend
+ NotEqualVariationParam{TestCaseData{}
+ .addInput<int64_t>({1, 3, 5, 7})
+ .addInput<int64_t>({1, 2, 3, 4})
+ .addOutput<bool>({false, true, true, true}),
+ circle::TensorType::TensorType_INT64,
+ {"cpu"}},
+ // Int64 type - broadcast
+ // NYI: acl backend
+ NotEqualVariationParam{TestCaseData{}
+ .addInput<int64_t>({1, 3, 5, 7})
+ .addInput<int64_t>({1})
+ .addOutput<bool>({false, true, true, true}),
+ circle::TensorType::TensorType_INT64,
+ {"cpu"}},
+ // Bool type
+ NotEqualVariationParam{TestCaseData{}
+ .addInput<bool>({false, false, true, true})
+ .addInput<bool>({false, true, false, true})
+ .addOutput<bool>({false, true, true, false}),
+ circle::TensorType::TensorType_BOOL},
+ // Bool type - broadcast
+ NotEqualVariationParam{TestCaseData{}
+ .addInput<bool>({false, false, true, true})
+ .addInput<bool>({false})
+ .addOutput<bool>({false, false, true, true}),
+ circle::TensorType::TensorType_BOOL}
+
+ ));
+
+TEST_P(NotEqualVariation, Test)
+{
+ auto &param = GetParam();
+
+ auto lhs_data = param.tcd.inputs.at(0);
+ auto rhs_data = param.tcd.inputs.at(1);
+
+ bool broadcast_lhs = false;
+ bool broadcast_rhs = false;
+ if (lhs_data.size() != rhs_data.size())
+ {
+ if (lhs_data.size() < rhs_data.size())
+ broadcast_lhs = true;
+ else
+ broadcast_rhs = true;
+ }
+
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_BOOL;
+
+ int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+ : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+ int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+ : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+ int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
+ cgen.addOperatorNotEqual({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends(param.backends);
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_NotEqual_DifferentType)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+ cgen.addOperatorNotEqual({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_NotEqual_InvalidType)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_INT32});
+ cgen.addOperatorNotEqual({{lhs, rhs}, {out}});
+ cgen.setInputsAndOutputs({lhs, rhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/OneHot.cc b/tests/nnfw_api/src/one_op_tests/OneHot.test.cc
index 78ad35b40..78ad35b40 100644
--- a/tests/nnfw_api/src/one_op_tests/OneHot.cc
+++ b/tests/nnfw_api/src/one_op_tests/OneHot.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Pad.cc b/tests/nnfw_api/src/one_op_tests/Pad.cc
deleted file mode 100644
index c376c1c02..000000000
--- a/tests/nnfw_api/src/one_op_tests/Pad.cc
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-// Input shape: {1, 2, 2, 1}
-// Padding: {0, 0, 1, 1, 1, 1, 0, 0}
-// Output shape: {1, 4, 4, 1}
-struct PadParam
-{
- TestCaseData tcd;
- circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
- float scale = 0.0f;
- int64_t zero_point = 0;
-};
-
-class PadVariation : public GenModelTest, public ::testing::WithParamInterface<PadParam>
-{
-};
-
-// Test with different value type
-INSTANTIATE_TEST_CASE_P(
- GenModelTest, PadVariation,
- ::testing::Values(
- // float value
- PadParam{uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})},
- // uint8 value
- PadParam{
- uniformTCD<uint8_t>({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}),
- circle::TensorType::TensorType_UINT8, 1.0, 8},
- // int8 value
- PadParam{uniformTCD<int8_t>({{-2, -1, 1, 2}},
- {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}),
- circle::TensorType::TensorType_INT8, 1.0, -5}));
-
-TEST_P(PadVariation, Test)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
- std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
- uint32_t padding_buf = cgen.addBuffer(padding_data);
- int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
- int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
-
- cgen.addOperatorPad({{in, padding}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(param.tcd);
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-TEST_P(PadVariation, neg_InvalidPadRank)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
- std::vector<int32_t> padding_data{1, 1, 1, 1};
- uint32_t padding_buf = cgen.addBuffer(padding_data);
- int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
- int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
-
- cgen.addOperatorPad({{in, padding}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
-
-TEST_P(PadVariation, neg_InvalidPadDim0)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
- std::vector<int32_t> padding_data{1, 1, 1, 1};
- uint32_t padding_buf = cgen.addBuffer(padding_data);
- int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
- int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
-
- cgen.addOperatorPad({{in, padding}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
-
-TEST_P(PadVariation, neg_InvalidPadDim1)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
- std::vector<int32_t> padding_data{1, 1, 1, 1};
- uint32_t padding_buf = cgen.addBuffer(padding_data);
- int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
- int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
-
- cgen.addOperatorPad({{in, padding}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
-
-TEST_P(PadVariation, neg_Type)
-{
- auto &param = GetParam();
-
- const circle::TensorType output_type = ((param.data_type == circle::TensorType::TensorType_UINT8)
- ? circle::TensorType::TensorType_INT8
- : circle::TensorType::TensorType_UINT8);
-
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
- std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
- uint32_t padding_buf = cgen.addBuffer(padding_data);
- int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
- int out = cgen.addTensor({{1, 4, 4, 1}, output_type}, 1.0, 0);
-
- cgen.addOperatorPad({{in, padding}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Pad_QuantParam)
-{
- CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1);
- std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
- uint32_t padding_buf = cgen.addBuffer(padding_data);
- int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
- int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
-
- cgen.addOperatorPad({{in, padding}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Pad.test.cc b/tests/nnfw_api/src/one_op_tests/Pad.test.cc
new file mode 100644
index 000000000..582bd84bc
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Pad.test.cc
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+// Input shape: {1, 2, 2, 1}
+// Padding: {0, 0, 1, 1, 1, 1, 0, 0}
+// Output shape: {1, 4, 4, 1}
+struct PadParam
+{
+ TestCaseData tcd;
+ circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+ float scale = 0.0f;
+ int64_t zero_point = 0;
+};
+
+class PadVariation : public GenModelTest, public ::testing::WithParamInterface<PadParam>
+{
+};
+
+// Test with different value type
+INSTANTIATE_TEST_SUITE_P(
+ GenModelTest, PadVariation,
+ ::testing::Values(
+ // float value
+ PadParam{uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})},
+ // uint8 value
+ PadParam{
+ uniformTCD<uint8_t>({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}),
+ circle::TensorType::TensorType_UINT8, 1.0, 8},
+ // int8 value
+ PadParam{uniformTCD<int8_t>({{-2, -1, 1, 2}},
+ {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}),
+ circle::TensorType::TensorType_INT8, 1.0, -5}));
+
+TEST_P(PadVariation, Test)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+ std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+TEST_P(PadVariation, neg_InvalidPadRank)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_P(PadVariation, neg_InvalidPadDim0)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_P(PadVariation, neg_InvalidPadDim1)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+ std::vector<int32_t> padding_data{1, 1, 1, 1};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_P(PadVariation, neg_Type)
+{
+ auto &param = GetParam();
+
+ const circle::TensorType output_type = ((param.data_type == circle::TensorType::TensorType_UINT8)
+ ? circle::TensorType::TensorType_INT8
+ : circle::TensorType::TensorType_UINT8);
+
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+ std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{1, 4, 4, 1}, output_type}, 1.0, 0);
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Pad_QuantParam)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1);
+ std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
+ uint32_t padding_buf = cgen.addBuffer(padding_data);
+ int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
+ int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 3);
+
+ cgen.addOperatorPad({{in, padding}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/PadV2.cc b/tests/nnfw_api/src/one_op_tests/PadV2.test.cc
index 3db2187b2..3db2187b2 100644
--- a/tests/nnfw_api/src/one_op_tests/PadV2.cc
+++ b/tests/nnfw_api/src/one_op_tests/PadV2.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Quantize.cc b/tests/nnfw_api/src/one_op_tests/Quantize.test.cc
index 5ab4d6297..5ab4d6297 100644
--- a/tests/nnfw_api/src/one_op_tests/Quantize.cc
+++ b/tests/nnfw_api/src/one_op_tests/Quantize.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Rank.cc b/tests/nnfw_api/src/one_op_tests/Rank.test.cc
index 60ec1931a..60ec1931a 100644
--- a/tests/nnfw_api/src/one_op_tests/Rank.cc
+++ b/tests/nnfw_api/src/one_op_tests/Rank.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Reduce.cc b/tests/nnfw_api/src/one_op_tests/Reduce.cc
deleted file mode 100644
index bdcc5c225..000000000
--- a/tests/nnfw_api/src/one_op_tests/Reduce.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-CircleBuffer genSimpleReduceModel(circle::BuiltinOperator op, bool keep_dims)
-{
- CircleGen cgen;
- uint32_t axis_buf = cgen.addBuffer(std::vector<int32_t>{0, 1, 2, 3});
- int in = cgen.addTensor({{2, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
- int axis = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, axis_buf});
- int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorReduce({{in, axis}, {out}}, op, keep_dims);
- cgen.setInputsAndOutputs({in}, {out});
- return cgen.finish();
-}
-
-TEST_F(GenModelTest, OneOp_ReduceMax)
-{
- auto model = genSimpleReduceModel(circle::BuiltinOperator_REDUCE_MAX, false);
- _context = std::make_unique<GenModelTestContext>(std::move(model));
- _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4, 5, 6}}, {{6}}));
- _context->addTestCase(uniformTCD<float>({{100, 98, 55, 200, 3, 40}}, {{200}}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-class ReduceMaxBadIndex : public GenModelTest,
- public ::testing::WithParamInterface<std::vector<int>>
-{
-};
-
-TEST_P(ReduceMaxBadIndex, neg_Test)
-{
- CircleGen cgen;
- // Axis cannot be equal or bigger than input's rank - 4
- uint32_t axis_buf = cgen.addBuffer(GetParam());
- int in = cgen.addTensor({{2, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
- int axis = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, axis_buf});
- int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorReduce({{in, axis}, {out}}, circle::BuiltinOperator_REDUCE_MAX, false);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailCompile();
-
- SUCCEED();
-}
-
-INSTANTIATE_TEST_CASE_P(GenModelTest, ReduceMaxBadIndex,
- ::testing::Values(std::vector<int32_t>{0, 1, 2, 4},
- std::vector<int32_t>{0, -5, 2, 3},
- std::vector<int32_t>{-88, 1, 2, 3},
- std::vector<int32_t>{0, 1, 88, 3}));
diff --git a/tests/nnfw_api/src/one_op_tests/Reduce.test.cc b/tests/nnfw_api/src/one_op_tests/Reduce.test.cc
new file mode 100644
index 000000000..13d180aed
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Reduce.test.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+CircleBuffer genSimpleReduceModel(circle::BuiltinOperator op, bool keep_dims)
+{
+ CircleGen cgen;
+ uint32_t axis_buf = cgen.addBuffer(std::vector<int32_t>{0, 1, 2, 3});
+ int in = cgen.addTensor({{2, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+ int axis = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, axis_buf});
+ int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorReduce({{in, axis}, {out}}, op, keep_dims);
+ cgen.setInputsAndOutputs({in}, {out});
+ return cgen.finish();
+}
+
+TEST_F(GenModelTest, OneOp_ReduceMax)
+{
+ auto model = genSimpleReduceModel(circle::BuiltinOperator_REDUCE_MAX, false);
+ _context = std::make_unique<GenModelTestContext>(std::move(model));
+ _context->addTestCase(uniformTCD<float>({{1, 2, 3, 4, 5, 6}}, {{6}}));
+ _context->addTestCase(uniformTCD<float>({{100, 98, 55, 200, 3, 40}}, {{200}}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+class ReduceMaxBadIndex : public GenModelTest,
+ public ::testing::WithParamInterface<std::vector<int>>
+{
+};
+
+TEST_P(ReduceMaxBadIndex, neg_Test)
+{
+ CircleGen cgen;
+ // Axis cannot be equal or bigger than input's rank - 4
+ uint32_t axis_buf = cgen.addBuffer(GetParam());
+ int in = cgen.addTensor({{2, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+ int axis = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, axis_buf});
+ int out = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorReduce({{in, axis}, {out}}, circle::BuiltinOperator_REDUCE_MAX, false);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+INSTANTIATE_TEST_SUITE_P(GenModelTest, ReduceMaxBadIndex,
+ ::testing::Values(std::vector<int32_t>{0, 1, 2, 4},
+ std::vector<int32_t>{0, -5, 2, 3},
+ std::vector<int32_t>{-88, 1, 2, 3},
+ std::vector<int32_t>{0, 1, 88, 3}));
diff --git a/tests/nnfw_api/src/one_op_tests/Relu.cc b/tests/nnfw_api/src/one_op_tests/Relu.test.cc
index 28c511270..28c511270 100644
--- a/tests/nnfw_api/src/one_op_tests/Relu.cc
+++ b/tests/nnfw_api/src/one_op_tests/Relu.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Relu6.cc b/tests/nnfw_api/src/one_op_tests/Relu6.test.cc
index 88b8eba83..88b8eba83 100644
--- a/tests/nnfw_api/src/one_op_tests/Relu6.cc
+++ b/tests/nnfw_api/src/one_op_tests/Relu6.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc
deleted file mode 100644
index 5db08f168..000000000
--- a/tests/nnfw_api/src/one_op_tests/ResizeBilinear.cc
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-#include <memory>
-
-struct ResizeBilinearParam
-{
- TestCaseData tcd;
- circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
- float scale = 0.0f;
- int64_t zero_point = 0;
-};
-
-class ResizeBilinearVariation : public GenModelTest,
- public ::testing::WithParamInterface<ResizeBilinearParam>
-{
-};
-
-TEST_P(ResizeBilinearVariation, Test)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- std::vector<int32_t> size_data{3, 3};
- uint32_t size_buf = cgen.addBuffer(size_data);
- int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
- int out = cgen.addTensor({{1, 3, 3, 1}, param.data_type}, param.scale, param.zero_point);
- cgen.addOperatorResizeBilinear({{in, size}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(param.tcd);
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
-INSTANTIATE_TEST_CASE_P(
- GenModelTest, ResizeBilinearVariation,
- ::testing::Values(
- // float value
- ResizeBilinearParam{uniformTCD<float>({{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667,
- 1.666666667, 2, 2, 2}})},
- // uint8 value
- ResizeBilinearParam{uniformTCD<uint8_t>({{3, 6, 9, 12}}, {{3, 5, 6, 7, 9, 10, 9, 11, 12}}),
- circle::TensorType::TensorType_UINT8, 1.0, 0},
- // int8 value
- ResizeBilinearParam{uniformTCD<int8_t>({{-6, -3, 9, 12}}, {{-6, -4, -3, 4, 6, 7, 9, 11, 12}}),
- circle::TensorType::TensorType_INT8, 1.0, 0}));
-
-TEST_F(GenModelTest, OneOp_ResizeBilinear_SizeToVar)
-{
- CircleGen cgen;
- int size = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorResizeBilinear({{in, size}, {out}});
- cgen.setInputsAndOutputs({in, size}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- // FIXME enable a test case the below is not a valid test case
- //_context->addTestCase(TestCaseData{}.addInput<int32_t>({3, 3}).addInput<float>({1, 1, 2,
- // 2}).addOutput<float>({1, 1, 1, 1.666666667, 1.666666667, 1.666666667, 2, 2, 2}));
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_ResizeBilinear_InvalidSizeVal)
-{
- CircleGen cgen;
- std::vector<int32_t> size_data{-3, 3};
- uint32_t size_buf = cgen.addBuffer(size_data);
- int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorResizeBilinear({{in, size}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"cpu"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/ResizeBilinear.test.cc b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.test.cc
new file mode 100644
index 000000000..fe313d4e7
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/ResizeBilinear.test.cc
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+struct ResizeBilinearParam
+{
+ TestCaseData tcd;
+ circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+ float scale = 0.0f;
+ int64_t zero_point = 0;
+};
+
+class ResizeBilinearVariation : public GenModelTest,
+ public ::testing::WithParamInterface<ResizeBilinearParam>
+{
+};
+
+TEST_P(ResizeBilinearVariation, Test)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ std::vector<int32_t> size_data{3, 3};
+ uint32_t size_buf = cgen.addBuffer(size_data);
+ int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({{1, 3, 3, 1}, param.data_type}, param.scale, param.zero_point);
+ cgen.addOperatorResizeBilinear({{in, size}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ GenModelTest, ResizeBilinearVariation,
+ ::testing::Values(
+ // float value
+ ResizeBilinearParam{uniformTCD<float>({{1, 1, 2, 2}}, {{1, 1, 1, 1.666666667, 1.666666667,
+ 1.666666667, 2, 2, 2}})},
+ // uint8 value
+ ResizeBilinearParam{uniformTCD<uint8_t>({{3, 6, 9, 12}}, {{3, 5, 6, 7, 9, 10, 9, 11, 12}}),
+ circle::TensorType::TensorType_UINT8, 1.0, 0},
+ // int8 value
+ ResizeBilinearParam{uniformTCD<int8_t>({{-6, -3, 9, 12}}, {{-6, -4, -3, 4, 6, 7, 9, 11, 12}}),
+ circle::TensorType::TensorType_INT8, 1.0, 0}));
+
+TEST_F(GenModelTest, OneOp_ResizeBilinear_SizeToVar)
+{
+ CircleGen cgen;
+ int size = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorResizeBilinear({{in, size}, {out}});
+ cgen.setInputsAndOutputs({in, size}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ // FIXME enable a test case the below is not a valid test case
+ //_context->addTestCase(TestCaseData{}.addInput<int32_t>({3, 3}).addInput<float>({1, 1, 2,
+ // 2}).addOutput<float>({1, 1, 1, 1.666666667, 1.666666667, 1.666666667, 2, 2, 2}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_ResizeBilinear_InvalidSizeVal)
+{
+ CircleGen cgen;
+ std::vector<int32_t> size_data{-3, 3};
+ uint32_t size_buf = cgen.addBuffer(size_data);
+ int size = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, size_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorResizeBilinear({{in, size}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"cpu"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc b/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.test.cc
index 1dd65844b..1dd65844b 100644
--- a/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.cc
+++ b/tests/nnfw_api/src/one_op_tests/ResizeNearestNeighbor.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Reverse.cc b/tests/nnfw_api/src/one_op_tests/Reverse.test.cc
index 4168b2123..4168b2123 100644
--- a/tests/nnfw_api/src/one_op_tests/Reverse.cc
+++ b/tests/nnfw_api/src/one_op_tests/Reverse.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Select.cc b/tests/nnfw_api/src/one_op_tests/Select.test.cc
index e1d991877..e1d991877 100644
--- a/tests/nnfw_api/src/one_op_tests/Select.cc
+++ b/tests/nnfw_api/src/one_op_tests/Select.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Shape.cc b/tests/nnfw_api/src/one_op_tests/Shape.test.cc
index 2a73db99a..2a73db99a 100644
--- a/tests/nnfw_api/src/one_op_tests/Shape.cc
+++ b/tests/nnfw_api/src/one_op_tests/Shape.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Slice.cc b/tests/nnfw_api/src/one_op_tests/Slice.cc
deleted file mode 100644
index 002fb0132..000000000
--- a/tests/nnfw_api/src/one_op_tests/Slice.cc
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-struct SliceVariationParam
-{
- std::vector<int32_t> input_shape;
- std::vector<int32_t> begins;
- std::vector<int32_t> sizes;
- TestCaseData tcd;
-
- circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
- float scale = 0.0f;
- int64_t zero_point = 0;
- circle::TensorType begins_type = circle::TensorType::TensorType_INT32;
-};
-
-class SliceVariation : public GenModelTest,
- public ::testing::WithParamInterface<SliceVariationParam>
-{
-};
-
-INSTANTIATE_TEST_CASE_P(
- GenModelTest, SliceVariation,
- ::testing::Values(
- SliceVariationParam{
- {2, 2, 3, 1},
- {0, 1, 1, 0},
- {1, 1, 2, 1},
- uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})},
- SliceVariationParam{
- {2, 2, 3, 1},
- {0, 1, 1, 0},
- {1, 1, 2, 1},
- uniformTCD<uint8_t>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
- circle::TensorType::TensorType_UINT8,
- 1,
- 0},
- SliceVariationParam{
- {2, 2, 3, 1},
- {0, 1, 1, 0},
- {1, 1, 2, 1},
- uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
- circle::TensorType::TensorType_FLOAT32,
- 0,
- 0,
- circle::TensorType::TensorType_INT64}));
-
-TEST_P(SliceVariation, Test)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
-
- int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point);
- int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point);
- if (param.begins_type == circle::TensorType::TensorType_INT32)
- {
- uint32_t begins_buf = cgen.addBuffer(param.begins);
- int rank = param.begins.size();
- int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
-
- uint32_t sizes_buf = cgen.addBuffer(param.sizes);
- int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf});
-
- cgen.addOperatorSlice({{in, begins, sizes}, {out}});
- }
- else if (param.begins_type == circle::TensorType::TensorType_INT64)
- {
- std::vector<int64_t> begins_64(param.begins.size());
- std::vector<int64_t> sizes_64(param.sizes.size());
- for (int i = 0; i < param.begins.size(); i++)
- {
- begins_64[i] = param.begins[i];
- sizes_64[i] = param.sizes[i];
- }
-
- uint32_t begins_buf = cgen.addBuffer(begins_64);
- int rank = param.begins.size();
- int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
-
- uint32_t sizes_buf = cgen.addBuffer(sizes_64);
- int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf});
-
- cgen.addOperatorSlice({{in, begins, sizes}, {out}});
- }
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(param.tcd);
-
- // acl don't support int64 yet
- if (param.begins_type == circle::TensorType::TensorType_INT64)
- {
- _context->setBackends({"cpu"});
- }
- else
- {
- _context->setBackends({"cpu", "acl_cl", "acl_neon"});
- }
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Slice_Type)
-{
- CircleGen cgen;
- int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
- std::vector<float> begins_data = {0, 0, 1, 0};
- uint32_t begins_buf = cgen.addBuffer(begins_data);
- int begins = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, begins_buf});
- std::vector<float> sizes_data = {1, 2, 1, 1};
- uint32_t sizes_buf = cgen.addBuffer(sizes_data);
- int sizes = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, sizes_buf});
- int out = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorSlice({{in, begins, sizes}, {out}});
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-TEST_P(SliceVariation, neg_DiffType)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
-
- int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point);
- int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point);
- if (param.begins_type == circle::TensorType::TensorType_INT32)
- {
- uint32_t begins_buf = cgen.addBuffer(param.begins);
- std::vector<int64_t> sizes_64(param.sizes.size());
- for (int i = 0; i < param.begins.size(); i++)
- {
- sizes_64[i] = param.sizes[i];
- }
-
- int rank = param.begins.size();
- int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
-
- uint32_t sizes_buf = cgen.addBuffer(sizes_64);
- int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT64, sizes_buf});
-
- cgen.addOperatorSlice({{in, begins, sizes}, {out}});
- }
- else if (param.begins_type == circle::TensorType::TensorType_INT64)
- {
- std::vector<int64_t> begins_64(param.begins.size());
- for (int i = 0; i < param.begins.size(); i++)
- {
- begins_64[i] = param.begins[i];
- }
-
- uint32_t begins_buf = cgen.addBuffer(begins_64);
- int rank = param.begins.size();
- int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
-
- uint32_t sizes_buf = cgen.addBuffer(param.sizes);
- int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT32, sizes_buf});
-
- cgen.addOperatorSlice({{in, begins, sizes}, {out}});
- }
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Slice.test.cc b/tests/nnfw_api/src/one_op_tests/Slice.test.cc
new file mode 100644
index 000000000..8cd9d7037
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Slice.test.cc
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+struct SliceVariationParam
+{
+ std::vector<int32_t> input_shape;
+ std::vector<int32_t> begins;
+ std::vector<int32_t> sizes;
+ TestCaseData tcd;
+
+ circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+ float scale = 0.0f;
+ int64_t zero_point = 0;
+ circle::TensorType begins_type = circle::TensorType::TensorType_INT32;
+};
+
+class SliceVariation : public GenModelTest,
+ public ::testing::WithParamInterface<SliceVariationParam>
+{
+};
+
+INSTANTIATE_TEST_SUITE_P(
+ GenModelTest, SliceVariation,
+ ::testing::Values(
+ SliceVariationParam{
+ {2, 2, 3, 1},
+ {0, 1, 1, 0},
+ {1, 1, 2, 1},
+ uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})},
+ SliceVariationParam{
+ {2, 2, 3, 1},
+ {0, 1, 1, 0},
+ {1, 1, 2, 1},
+ uniformTCD<uint8_t>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
+ circle::TensorType::TensorType_UINT8,
+ 1,
+ 0},
+ SliceVariationParam{
+ {2, 2, 3, 1},
+ {0, 1, 1, 0},
+ {1, 1, 2, 1},
+ uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
+ circle::TensorType::TensorType_FLOAT32,
+ 0,
+ 0,
+ circle::TensorType::TensorType_INT64}));
+
+TEST_P(SliceVariation, Test)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+
+ int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point);
+ if (param.begins_type == circle::TensorType::TensorType_INT32)
+ {
+ uint32_t begins_buf = cgen.addBuffer(param.begins);
+ int rank = param.begins.size();
+ int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+ uint32_t sizes_buf = cgen.addBuffer(param.sizes);
+ int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf});
+
+ cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+ }
+ else if (param.begins_type == circle::TensorType::TensorType_INT64)
+ {
+ std::vector<int64_t> begins_64(param.begins.size());
+ std::vector<int64_t> sizes_64(param.sizes.size());
+ for (int i = 0; i < param.begins.size(); i++)
+ {
+ begins_64[i] = param.begins[i];
+ sizes_64[i] = param.sizes[i];
+ }
+
+ uint32_t begins_buf = cgen.addBuffer(begins_64);
+ int rank = param.begins.size();
+ int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+ uint32_t sizes_buf = cgen.addBuffer(sizes_64);
+ int sizes = cgen.addTensor({{rank}, param.begins_type, sizes_buf});
+
+ cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+ }
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+
+ // acl don't support int64 yet
+ if (param.begins_type == circle::TensorType::TensorType_INT64)
+ {
+ _context->setBackends({"cpu"});
+ }
+ else
+ {
+ _context->setBackends({"cpu", "acl_cl", "acl_neon"});
+ }
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Slice_Type)
+{
+ CircleGen cgen;
+ int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
+ std::vector<float> begins_data = {0, 0, 1, 0};
+ uint32_t begins_buf = cgen.addBuffer(begins_data);
+ int begins = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, begins_buf});
+ std::vector<float> sizes_data = {1, 2, 1, 1};
+ uint32_t sizes_buf = cgen.addBuffer(sizes_data);
+ int sizes = cgen.addTensor({{4}, circle::TensorType::TensorType_FLOAT32, sizes_buf});
+ int out = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+TEST_P(SliceVariation, neg_DiffType)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+
+ int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point);
+ if (param.begins_type == circle::TensorType::TensorType_INT32)
+ {
+ uint32_t begins_buf = cgen.addBuffer(param.begins);
+ std::vector<int64_t> sizes_64(param.sizes.size());
+ for (int i = 0; i < param.begins.size(); i++)
+ {
+ sizes_64[i] = param.sizes[i];
+ }
+
+ int rank = param.begins.size();
+ int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+ uint32_t sizes_buf = cgen.addBuffer(sizes_64);
+ int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT64, sizes_buf});
+
+ cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+ }
+ else if (param.begins_type == circle::TensorType::TensorType_INT64)
+ {
+ std::vector<int64_t> begins_64(param.begins.size());
+ for (int i = 0; i < param.begins.size(); i++)
+ {
+ begins_64[i] = param.begins[i];
+ }
+
+ uint32_t begins_buf = cgen.addBuffer(begins_64);
+ int rank = param.begins.size();
+ int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+ uint32_t sizes_buf = cgen.addBuffer(param.sizes);
+ int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT32, sizes_buf});
+
+ cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+ }
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Softmax.cc b/tests/nnfw_api/src/one_op_tests/Softmax.cc
deleted file mode 100644
index aba4e89a0..000000000
--- a/tests/nnfw_api/src/one_op_tests/Softmax.cc
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-
-// beta = 0.1
-// input/output shape: {1, 2, 1, 4}
-struct SoftmaxParam
-{
- TestCaseData tcd;
- circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
- float input_scale = 0.0f;
- int64_t input_zero_point = 0;
-};
-
-class SoftmaxVariation : public GenModelTest, public ::testing::WithParamInterface<SoftmaxParam>
-{
-};
-
-// Test with different value type
-INSTANTIATE_TEST_CASE_P(
- GenModelTest, SoftmaxVariation,
- ::testing::Values(
- // float value
- SoftmaxParam{
- uniformTCD<float>({{0, -6, 2, 4, 3, -2, 10, 1}},
- {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})},
- // uint8 value
- SoftmaxParam{
- uniformTCD<uint8_t>({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}),
- circle::TensorType::TensorType_UINT8, 1.0, 10},
- // int8 value
- SoftmaxParam{
- uniformTCD<int8_t>({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}),
- circle::TensorType::TensorType_INT8, 1.0, 0}));
-
-TEST_P(SoftmaxVariation, Test)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
-
- // NNAPI spec and tflite test use fixed output scale and zero-point
- float out_scale = 0.0;
- int64_t out_zero_point = 0;
- if (param.data_type == circle::TensorType::TensorType_UINT8)
- {
- out_scale = 1.0f / 256;
- }
- else if (param.data_type == circle::TensorType::TensorType_INT8)
- {
- out_scale = 1.0f / 256;
- out_zero_point = -128;
- }
-
- int input =
- cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point);
- int out = cgen.addTensor({{1, 2, 1, 4}, param.data_type}, out_scale, out_zero_point);
- cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
- cgen.setInputsAndOutputs({input}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(param.tcd);
- _context->setBackends({"cpu", "acl_neon", "acl_cl"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, neg_OneOp_Softmax_Invaild_Beta)
-{
- CircleGen cgen;
- int input = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
- cgen.setInputsAndOutputs({input}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{-1., 0., 1., 1.}}, {{-1., -1., -1., -1.}}));
- _context->setBackends({"gpu_cl"});
- _context->expectFailCompile();
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_Softmax)
-{
- CircleGen cgen;
- int lhs = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorSoftmax({{lhs}, {out}}, 1.0);
- cgen.setInputsAndOutputs({lhs}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>(
- {{-1., 0., 1., 1.}},
- {{0.054064586758613586, 0.14696279168128967, 0.39948627352714539, 0.39948627352714539}}));
- _context->setBackends({"acl_cl", "cpu", "gpu_cl"});
-
- SUCCEED();
-}
-
-TEST_P(SoftmaxVariation, neg_Type)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- int input =
- cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point);
- int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_BOOL});
- cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
- cgen.setInputsAndOutputs({input}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/Softmax.test.cc b/tests/nnfw_api/src/one_op_tests/Softmax.test.cc
new file mode 100644
index 000000000..1782baf64
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/Softmax.test.cc
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+// beta = 0.1
+// input/output shape: {1, 2, 1, 4}
+struct SoftmaxParam
+{
+ TestCaseData tcd;
+ circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
+ float input_scale = 0.0f;
+ int64_t input_zero_point = 0;
+};
+
+class SoftmaxVariation : public GenModelTest, public ::testing::WithParamInterface<SoftmaxParam>
+{
+};
+
+// Test with different value type
+INSTANTIATE_TEST_SUITE_P(
+ GenModelTest, SoftmaxVariation,
+ ::testing::Values(
+ // float value
+ SoftmaxParam{
+ uniformTCD<float>({{0, -6, 2, 4, 3, -2, 10, 1}},
+ {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})},
+ // uint8 value
+ SoftmaxParam{
+ uniformTCD<uint8_t>({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}),
+ circle::TensorType::TensorType_UINT8, 1.0, 10},
+ // int8 value
+ SoftmaxParam{
+ uniformTCD<int8_t>({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}),
+ circle::TensorType::TensorType_INT8, 1.0, 0}));
+
+TEST_P(SoftmaxVariation, Test)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+
+ // NNAPI spec and tflite test use fixed output scale and zero-point
+ float out_scale = 0.0;
+ int64_t out_zero_point = 0;
+ if (param.data_type == circle::TensorType::TensorType_UINT8)
+ {
+ out_scale = 1.0f / 256;
+ }
+ else if (param.data_type == circle::TensorType::TensorType_INT8)
+ {
+ out_scale = 1.0f / 256;
+ out_zero_point = -128;
+ }
+
+ int input =
+ cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point);
+ int out = cgen.addTensor({{1, 2, 1, 4}, param.data_type}, out_scale, out_zero_point);
+ cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
+ cgen.setInputsAndOutputs({input}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends({"cpu", "acl_neon", "acl_cl"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_Softmax_Invaild_Beta)
+{
+ CircleGen cgen;
+ int input = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
+ cgen.setInputsAndOutputs({input}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{-1., 0., 1., 1.}}, {{-1., -1., -1., -1.}}));
+ _context->setBackends({"gpu_cl"});
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_Softmax)
+{
+ CircleGen cgen;
+ int lhs = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+ cgen.addOperatorSoftmax({{lhs}, {out}}, 1.0);
+ cgen.setInputsAndOutputs({lhs}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>(
+ {{-1., 0., 1., 1.}},
+ {{0.054064586758613586, 0.14696279168128967, 0.39948627352714539, 0.39948627352714539}}));
+ _context->setBackends({"acl_cl", "cpu", "gpu_cl"});
+
+ SUCCEED();
+}
+
+TEST_P(SoftmaxVariation, neg_Type)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ int input =
+ cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point);
+ int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_BOOL});
+ cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
+ cgen.setInputsAndOutputs({input}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Split.cc b/tests/nnfw_api/src/one_op_tests/Split.test.cc
index 32be9a767..32be9a767 100644
--- a/tests/nnfw_api/src/one_op_tests/Split.cc
+++ b/tests/nnfw_api/src/one_op_tests/Split.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Sqrt.cc b/tests/nnfw_api/src/one_op_tests/Sqrt.test.cc
index 01f313371..01f313371 100644
--- a/tests/nnfw_api/src/one_op_tests/Sqrt.cc
+++ b/tests/nnfw_api/src/one_op_tests/Sqrt.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Square.cc b/tests/nnfw_api/src/one_op_tests/Square.test.cc
index 2ec9bad0d..2ec9bad0d 100644
--- a/tests/nnfw_api/src/one_op_tests/Square.cc
+++ b/tests/nnfw_api/src/one_op_tests/Square.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/StridedSlice.cc b/tests/nnfw_api/src/one_op_tests/StridedSlice.test.cc
index fb29018d4..fb29018d4 100644
--- a/tests/nnfw_api/src/one_op_tests/StridedSlice.cc
+++ b/tests/nnfw_api/src/one_op_tests/StridedSlice.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Sub.cc b/tests/nnfw_api/src/one_op_tests/Sub.test.cc
index bb4fecd2d..bb4fecd2d 100644
--- a/tests/nnfw_api/src/one_op_tests/Sub.cc
+++ b/tests/nnfw_api/src/one_op_tests/Sub.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Tile.cc b/tests/nnfw_api/src/one_op_tests/Tile.test.cc
index 3f193d5e6..3f193d5e6 100644
--- a/tests/nnfw_api/src/one_op_tests/Tile.cc
+++ b/tests/nnfw_api/src/one_op_tests/Tile.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/Transpose.cc b/tests/nnfw_api/src/one_op_tests/Transpose.test.cc
index 5a92c7303..5a92c7303 100644
--- a/tests/nnfw_api/src/one_op_tests/Transpose.cc
+++ b/tests/nnfw_api/src/one_op_tests/Transpose.test.cc
diff --git a/tests/nnfw_api/src/one_op_tests/While.cc b/tests/nnfw_api/src/one_op_tests/While.cc
deleted file mode 100644
index ee0a9df46..000000000
--- a/tests/nnfw_api/src/one_op_tests/While.cc
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GenModelTest.h"
-#include "WhileTestModel.h"
-
-#include <memory>
-
-TEST_F(GenModelTest, OneOp_While)
-{
- WhileModelLoop10 model;
- _context = std::make_unique<GenModelTestContext>(std::move(model.cbuf));
- _context->addTestCase(uniformTCD<float>({{0}}, {{100}}));
- _context->addTestCase(uniformTCD<float>({{2}}, {{102}}));
- _context->addTestCase(uniformTCD<float>({{22}}, {{102}}));
- _context->addTestCase(uniformTCD<float>({{100}}, {{100}}));
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_While_github_4783)
-{
- // The model looks just like the below pseudocode
- //
- // function model(x, data)
- // {
- // // `data` does not do anything but passed to while's cond and body subgraphs
- // // to measure copy overhead between subgraphs
- // while (x < 100.0)
- // {
- // x = x + 1.0;
- // }
- // return (x, data)
- // }
-
- const int kElems = 4;
- const std::vector<int32_t> shape{kElems};
-
- CircleGen cgen;
- uint32_t incr_buf = cgen.addBuffer(std::vector<float>{1});
- uint32_t incr_data_buf = cgen.addBuffer(std::vector<float>(kElems, 1));
- uint32_t end_buf = cgen.addBuffer(std::vector<float>{100});
-
- // primary subgraph
- {
- int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int d_in = cgen.addTensor({shape, circle::TensorType_FLOAT32});
- int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int d_out = cgen.addTensor({shape, circle::TensorType_FLOAT32});
- cgen.addOperatorWhile({{x_in, d_in}, {x_out, d_out}}, 1, 2);
- cgen.setInputsAndOutputs({x_in, d_in}, {x_out, d_out});
- }
-
- // cond subgraph
- {
- cgen.nextSubgraph();
- int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int d = cgen.addTensor({shape, circle::TensorType_FLOAT32});
- int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
- int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
- cgen.addOperatorLess({{x, end}, {result}});
- cgen.setInputsAndOutputs({x, d}, {result});
- }
-
- // body subgraph
- {
- cgen.nextSubgraph();
- int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
- int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int d_in = cgen.addTensor({shape, circle::TensorType_FLOAT32});
- int incr_d = cgen.addTensor({shape, circle::TensorType_FLOAT32, incr_data_buf});
- int d_out = cgen.addTensor({shape, circle::TensorType_FLOAT32});
- cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
- cgen.addOperatorAdd({{d_in, incr_d}, {d_out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({x_in, d_in}, {x_out, d_out});
- }
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- std::vector<float> tc_data_in(kElems, 9);
- std::vector<float> tc_data_out(kElems, 109);
- _context->addTestCase(uniformTCD<float>({{0}, tc_data_in}, {{100}, tc_data_out}));
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
-
-TEST_F(GenModelTest, OneOp_While_TwoInputs)
-{
- // The model looks just like the below pseudocode
- //
- // function model(x, end)
- // {
- // while (x < end)
- // {
- // x = x + 10.0
- // }
- // return x
- // }
-
- CircleGen cgen;
- std::vector<float> incr_data{10};
- uint32_t incr_buf = cgen.addBuffer(incr_data);
-
- // primary subgraph
- {
- int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int end_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int end_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- cgen.addOperatorWhile({{x_in, end_in}, {x_out, end_out}}, 1, 2);
- cgen.setInputsAndOutputs({x_in, end_in}, {x_out});
- }
-
- // cond subgraph
- {
- cgen.nextSubgraph();
- int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
- cgen.addOperatorLess({{x, end}, {result}});
- cgen.setInputsAndOutputs({x, end}, {result});
- }
-
- // body subgraph
- {
- cgen.nextSubgraph();
- int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
- int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({x_in, end}, {x_out, end});
- }
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(uniformTCD<float>({{0}, {20}}, {{20}}));
- _context->addTestCase(uniformTCD<float>({{5}, {30}}, {{35}}));
- _context->addTestCase(uniformTCD<float>({{20}, {10}}, {{20}}));
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
-
-class WhileWrongSubgraphIndex : public GenModelTest,
- public ::testing::WithParamInterface<std::pair<int, int>>
-{
-};
-
-TEST_P(WhileWrongSubgraphIndex, neg_Test)
-{
- // These values must be less than 0 or greater than 2
- int cond_subg = GetParam().first;
- int body_subg = GetParam().second;
-
- // When While operation's subgraph index is invalid
-
- CircleGen cgen;
-
- // constant buffers
- std::vector<float> incr_data{10};
- uint32_t incr_buf = cgen.addBuffer(incr_data);
-
- // primary subgraph
- {
- int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int end_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int end_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- cgen.addOperatorWhile({{x_in, end_in}, {x_out, end_out}}, cond_subg, body_subg);
- cgen.setInputsAndOutputs({x_in, end_in}, {x_out});
- }
-
- // cond subgraph
- {
- cgen.nextSubgraph();
- int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
- cgen.addOperatorLess({{x, end}, {result}});
- cgen.setInputsAndOutputs({x, end}, {result});
- }
-
- // body subgraph
- {
- cgen.nextSubgraph();
- int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
- int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({x_in, end}, {x_out, end});
- }
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"cpu"});
- _context->expectFailModelLoad();
-
- SUCCEED();
-}
-
-INSTANTIATE_TEST_CASE_P(GenModelTest, WhileWrongSubgraphIndex,
- ::testing::Values(std::make_pair(99, 2), std::make_pair(-1, 2),
- std::make_pair(1, 99), std::make_pair(1, -99),
- std::make_pair(-99, 99)));
-
-// In this test, output of WHILE and body subgraph have different data types
-TEST_F(GenModelTest, neg_while_wrong_dtype)
-{
- CircleGen cgen;
- std::vector<float> incr_data{10};
- uint32_t incr_buf = cgen.addBuffer(incr_data);
- std::vector<float> end_data{100};
- uint32_t end_buf = cgen.addBuffer(end_data);
-
- // primary subgraph
- {
- int model_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int model_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
-
- cgen.addOperatorWhile({{model_in}, {model_out}}, 1, 2);
- cgen.setInputsAndOutputs({model_in}, {model_out});
- }
-
- // cond subgraph
- {
- cgen.nextSubgraph();
- int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
- int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
- cgen.addOperatorLess({{x, end}, {result}});
- cgen.setInputsAndOutputs({x}, {result});
- }
-
- // body subgraph
- {
- cgen.nextSubgraph();
- int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
- int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
- int cast_out = cgen.addTensor({{1}, circle::TensorType_INT32});
- cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
- cgen.addOperatorCast({{x_out}, {cast_out}}, circle::TensorType_FLOAT32,
- circle::TensorType_INT32);
- cgen.setInputsAndOutputs({x_in}, {cast_out});
- // output of this subgraph is INT32 but output of WHILE is FLOAT32
- }
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- auto tc = uniformTCD<float>({{0}}, {{100}});
- tc.expectFailRun();
- _context->addTestCase(tc);
- _context->setBackends({"cpu"});
-
- SUCCEED();
-}
diff --git a/tests/nnfw_api/src/one_op_tests/While.test.cc b/tests/nnfw_api/src/one_op_tests/While.test.cc
new file mode 100644
index 000000000..5c4da552c
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/While.test.cc
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+#include "WhileTestModel.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_While)
+{
+ WhileModelLoop10 model;
+ _context = std::make_unique<GenModelTestContext>(std::move(model.cbuf));
+ _context->addTestCase(uniformTCD<float>({{0}}, {{100}}));
+ _context->addTestCase(uniformTCD<float>({{2}}, {{102}}));
+ _context->addTestCase(uniformTCD<float>({{22}}, {{102}}));
+ _context->addTestCase(uniformTCD<float>({{100}}, {{100}}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_While_github_4783)
+{
+ // The model looks just like the below pseudocode
+ //
+ // function model(x, data)
+ // {
+ // // `data` does not do anything but passed to while's cond and body subgraphs
+ // // to measure copy overhead between subgraphs
+ // while (x < 100.0)
+ // {
+ // x = x + 1.0;
+ // }
+ // return (x, data)
+ // }
+
+ const int kElems = 4;
+ const std::vector<int32_t> shape{kElems};
+
+ CircleGen cgen;
+ uint32_t incr_buf = cgen.addBuffer(std::vector<float>{1});
+ uint32_t incr_data_buf = cgen.addBuffer(std::vector<float>(kElems, 1));
+ uint32_t end_buf = cgen.addBuffer(std::vector<float>{100});
+
+ // primary subgraph
+ {
+ int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int d_in = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+ int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int d_out = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+ cgen.addOperatorWhile({{x_in, d_in}, {x_out, d_out}}, 1, 2);
+ cgen.setInputsAndOutputs({x_in, d_in}, {x_out, d_out});
+ }
+
+ // cond subgraph
+ {
+ cgen.nextSubgraph();
+ int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int d = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+ int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
+ int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+ cgen.addOperatorLess({{x, end}, {result}});
+ cgen.setInputsAndOutputs({x, d}, {result});
+ }
+
+ // body subgraph
+ {
+ cgen.nextSubgraph();
+ int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+ int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int d_in = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+ int incr_d = cgen.addTensor({shape, circle::TensorType_FLOAT32, incr_data_buf});
+ int d_out = cgen.addTensor({shape, circle::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+ cgen.addOperatorAdd({{d_in, incr_d}, {d_out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({x_in, d_in}, {x_out, d_out});
+ }
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ std::vector<float> tc_data_in(kElems, 9);
+ std::vector<float> tc_data_out(kElems, 109);
+ _context->addTestCase(uniformTCD<float>({{0}, tc_data_in}, {{100}, tc_data_out}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_While_TwoInputs)
+{
+ // The model looks just like the below pseudocode
+ //
+ // function model(x, end)
+ // {
+ // while (x < end)
+ // {
+ // x = x + 10.0
+ // }
+ // return x
+ // }
+
+ CircleGen cgen;
+ std::vector<float> incr_data{10};
+ uint32_t incr_buf = cgen.addBuffer(incr_data);
+
+ // primary subgraph
+ {
+ int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int end_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int end_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ cgen.addOperatorWhile({{x_in, end_in}, {x_out, end_out}}, 1, 2);
+ cgen.setInputsAndOutputs({x_in, end_in}, {x_out});
+ }
+
+ // cond subgraph
+ {
+ cgen.nextSubgraph();
+ int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+ cgen.addOperatorLess({{x, end}, {result}});
+ cgen.setInputsAndOutputs({x, end}, {result});
+ }
+
+ // body subgraph
+ {
+ cgen.nextSubgraph();
+ int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+ int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({x_in, end}, {x_out, end});
+ }
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{0}, {20}}, {{20}}));
+ _context->addTestCase(uniformTCD<float>({{5}, {30}}, {{35}}));
+ _context->addTestCase(uniformTCD<float>({{20}, {10}}, {{20}}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+class WhileWrongSubgraphIndex : public GenModelTest,
+ public ::testing::WithParamInterface<std::pair<int, int>>
+{
+};
+
+TEST_P(WhileWrongSubgraphIndex, neg_Test)
+{
+ // These values must be less than 0 or greater than 2
+ int cond_subg = GetParam().first;
+ int body_subg = GetParam().second;
+
+ // When While operation's subgraph index is invalid
+
+ CircleGen cgen;
+
+ // constant buffers
+ std::vector<float> incr_data{10};
+ uint32_t incr_buf = cgen.addBuffer(incr_data);
+
+ // primary subgraph
+ {
+ int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int end_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int end_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ cgen.addOperatorWhile({{x_in, end_in}, {x_out, end_out}}, cond_subg, body_subg);
+ cgen.setInputsAndOutputs({x_in, end_in}, {x_out});
+ }
+
+ // cond subgraph
+ {
+ cgen.nextSubgraph();
+ int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+ cgen.addOperatorLess({{x, end}, {result}});
+ cgen.setInputsAndOutputs({x, end}, {result});
+ }
+
+ // body subgraph
+ {
+ cgen.nextSubgraph();
+ int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+ int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({x_in, end}, {x_out, end});
+ }
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
+
+INSTANTIATE_TEST_SUITE_P(GenModelTest, WhileWrongSubgraphIndex,
+ ::testing::Values(std::make_pair(99, 2), std::make_pair(-1, 2),
+ std::make_pair(1, 99), std::make_pair(1, -99),
+ std::make_pair(-99, 99)));
+
+// In this test, output of WHILE and body subgraph have different data types
+TEST_F(GenModelTest, neg_while_wrong_dtype)
+{
+ CircleGen cgen;
+ std::vector<float> incr_data{10};
+ uint32_t incr_buf = cgen.addBuffer(incr_data);
+ std::vector<float> end_data{100};
+ uint32_t end_buf = cgen.addBuffer(end_data);
+
+ // primary subgraph
+ {
+ int model_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int model_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+
+ cgen.addOperatorWhile({{model_in}, {model_out}}, 1, 2);
+ cgen.setInputsAndOutputs({model_in}, {model_out});
+ }
+
+ // cond subgraph
+ {
+ cgen.nextSubgraph();
+ int x = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int end = cgen.addTensor({{1}, circle::TensorType_FLOAT32, end_buf});
+ int result = cgen.addTensor({{1}, circle::TensorType_BOOL});
+ cgen.addOperatorLess({{x, end}, {result}});
+ cgen.setInputsAndOutputs({x}, {result});
+ }
+
+ // body subgraph
+ {
+ cgen.nextSubgraph();
+ int x_in = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int incr = cgen.addTensor({{1}, circle::TensorType_FLOAT32, incr_buf});
+ int x_out = cgen.addTensor({{1}, circle::TensorType_FLOAT32});
+ int cast_out = cgen.addTensor({{1}, circle::TensorType_INT32});
+ cgen.addOperatorAdd({{x_in, incr}, {x_out}}, circle::ActivationFunctionType_NONE);
+ cgen.addOperatorCast({{x_out}, {cast_out}}, circle::TensorType_FLOAT32,
+ circle::TensorType_INT32);
+ cgen.setInputsAndOutputs({x_in}, {cast_out});
+ // output of this subgraph is INT32 but output of WHILE is FLOAT32
+ }
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->setBackends({"cpu"});
+ // It is correct to call `_context->expectFailModelLoad();`, but OperationValidator does not deal
+ // with subgraphs. So it is verified by `_context->expectFailCompile(); as a workaround`
+ _context->expectFailCompile();
+
+ SUCCEED();
+}
diff --git a/tests/scripts/command/nnpkg-test b/tests/scripts/command/nnpkg-test
index a1176d153..ba712175e 100644
--- a/tests/scripts/command/nnpkg-test
+++ b/tests/scripts/command/nnpkg-test
@@ -12,6 +12,7 @@ outdir="."
nnpkg_run=${nnpkg_run:-"nnpackage_run"}
difftool=${difftool:-"h5diff"}
delete_dumped_on_failure=0
+verbose_diff=0
usage() {
echo "Usage: $0 $progname [options] nnpackage_test"
@@ -27,6 +28,7 @@ usage() {
echo " -o set output directory (default=$outdir)"
echo " -d delete dumped file on failure."
echo " (dumped file are always deleted on success) (default=$delete_dumped_on_failure)"
+ echo " -v verbose result diff (default=$verbose_diff)"
echo ""
echo "Environment variables:"
echo " nnpackage_run path to nnpackage_run (default=nnpackage_run)"
@@ -43,12 +45,13 @@ if [ $# -eq 0 ]; then
exit 1
fi
-while getopts "hdi:o:" OPTION; do
+while getopts "hdi:o:v" OPTION; do
case "${OPTION}" in
h) usage;;
d) delete_dumped_on_failure=1;;
i) indir=$OPTARG;;
o) outdir=$OPTARG;;
+ v) verbose_diff=1;;
?) exit 1;;
esac
done
@@ -110,8 +113,8 @@ echo -n "[Compare] $nnpkg "
test_fail()
{
echo -e "\tFail"
- [ $delete_dumped_on_failure ] && rm "$dumped"
- cat "$dumped.log"
+ [ $delete_dumped_on_failure -eq 1 ] && rm "$dumped"
+ [ $verbose_diff -eq 1 ] && cat "$dumped.log"
rm "$dumped.log"
exit 3
}
@@ -119,7 +122,7 @@ test_fail()
test_pass()
{
echo -e "\tPass"
- cat "$dumped.log"
+ [ $verbose_diff -eq 1 ] && cat "$dumped.log"
rm "$dumped" "$dumped.log"
}
diff --git a/tests/scripts/command/prepare-model b/tests/scripts/command/prepare-model
index 5b3340813..7c6525491 100644
--- a/tests/scripts/command/prepare-model
+++ b/tests/scripts/command/prepare-model
@@ -24,8 +24,8 @@ function Usage()
echo "Usage: $0 $(basename ${BASH_SOURCE[0]}) [OPTIONS]"
echo ""
echo "Options:"
- echo " --ignoremd5 Ignore MD5 check when download model files"
- echo " --model=(all|nnpackage|tflite) Download test model (deprecated option: always all)"
+ echo " --ignoremd5 Ignore MD5 check when download model files"
+ echo " -h, --help Display this help message and exit"
}
for i in "$@"
@@ -38,9 +38,6 @@ do
--ignoremd5)
MD5_CHECK="off"
;;
- --model=*)
- # deprecated
- ;;
*)
echo "Unknown option: $i"
exit 1
@@ -49,9 +46,10 @@ do
shift
done
-# Default download server url
+# Check MODELFILE_SERVER
if [[ -z "$MODELFILE_SERVER" ]]; then
- export MODELFILE_SERVER="http://npu.mooo.com/archive/tflite_test_model/"
+ echo "Fail to download models: Please set MODELFILE_SERVER to download model"
+ exit 1
fi
echo "Download from $MODELFILE_SERVER"
diff --git a/tests/tools/nnpackage_run/src/nnpackage_run.cc b/tests/tools/nnpackage_run/src/nnpackage_run.cc
index 71d8b5977..7a58053f3 100644
--- a/tests/tools/nnpackage_run/src/nnpackage_run.cc
+++ b/tests/tools/nnpackage_run/src/nnpackage_run.cc
@@ -29,6 +29,7 @@
#include "ruy/profiler/profiler.h"
#endif
+#include <boost/program_options.hpp>
#include <cassert>
#include <chrono>
#include <cstdlib>
@@ -313,6 +314,11 @@ int main(const int argc, char **argv)
return 0;
}
+ catch (boost::program_options::error &e)
+ {
+ std::cerr << "E: " << e.what() << std::endl;
+ exit(-1);
+ }
catch (std::runtime_error &e)
{
std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
diff --git a/tests/tools/nnpackage_run/src/rawformatter.cc b/tests/tools/nnpackage_run/src/rawformatter.cc
index f90018e56..e4b977485 100644
--- a/tests/tools/nnpackage_run/src/rawformatter.cc
+++ b/tests/tools/nnpackage_run/src/rawformatter.cc
@@ -29,14 +29,13 @@ void RawFormatter::loadInputs(const std::string &filename, std::vector<Allocatio
uint32_t num_inputs;
NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
- // TODO: Support multiple inputs
- // Option 1. Get comman-separated input file list like --load:raw in.0,in.1,in.2
- // Option 2. Get prefix --load:raw out
- // Internally access out.0, out.1, out.2, ... out.{N} where N is determined by api.
- if (num_inputs != 1)
- {
- throw std::runtime_error("Only 1 input is supported for raw input");
- }
+ // Support multiple inputs
+ // Option 1: Get comman-separated input file list like --load:raw a,b,c
+ // Option 2: Get prefix --load:raw in
+ // Internally access in.0, in.1, in.2, ... in.{N-1} where N is determined by nnfw info
+ // query api.
+ //
+ // Currently Option 2 is implemented.
try
{
for (uint32_t i = 0; i < num_inputs; ++i)
@@ -48,11 +47,12 @@ void RawFormatter::loadInputs(const std::string &filename, std::vector<Allocatio
auto bufsz = bufsize_for(&ti);
inputs[i].alloc(bufsz);
- std::ifstream file(filename, std::ios::ate | std::ios::binary);
+ std::ifstream file(filename + "." + std::to_string(i), std::ios::ate | std::ios::binary);
auto filesz = file.tellg();
if (bufsz != filesz)
{
- throw std::runtime_error("Input Size does not match: " + std::to_string(bufsz) +
+ throw std::runtime_error("Input " + std::to_string(i) +
+ " size does not match: " + std::to_string(bufsz) +
" expected, but " + std::to_string(filesz) + " provided.");
}
file.seekg(0, std::ios::beg);
@@ -74,12 +74,6 @@ void RawFormatter::dumpOutputs(const std::string &filename, std::vector<Allocati
{
uint32_t num_outputs;
NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
- // TODO: Support multiple outputs
- // Available options are same.
- if (num_outputs != 1)
- {
- throw std::runtime_error("Only 1 output is supported for raw input");
- }
try
{
for (uint32_t i = 0; i < num_outputs; i++)
diff --git a/tests/tools/tflite_vanilla_run/CMakeLists.txt b/tests/tools/tflite_vanilla_run/CMakeLists.txt
index a673058a4..115b2f386 100644
--- a/tests/tools/tflite_vanilla_run/CMakeLists.txt
+++ b/tests/tools/tflite_vanilla_run/CMakeLists.txt
@@ -2,12 +2,13 @@ if(NOT BUILD_TFLITE_VANILLA_RUN)
return()
endif()
-if(NOT BUILD_TENSORFLOW_LITE_2_3_0)
- set(BUILD_TENSORFLOW_LITE_2_3_0 ON)
+if(NOT BUILD_TENSORFLOW_LITE_2_8_0)
+ set(BUILD_TENSORFLOW_LITE_2_8_0 ON)
+ set(BUILD_TENSORFLOWRUY ON)
endif()
-nnfw_find_package(TensorFlowLite EXACT 2.3.0 REQUIRED)
-nnfw_find_package(Boost REQUIRED)
+nnfw_find_package(TensorFlowLite EXACT 2.8.0 REQUIRED)
+nnfw_find_package(Boost REQUIRED program_options)
list(APPEND TFLITE_RUN_SRCS "src/tflite_vanilla_run.cc")
list(APPEND TFLITE_RUN_SRCS "src/args.cc")
@@ -16,7 +17,7 @@ add_executable(tflite_vanilla_run ${TFLITE_RUN_SRCS})
target_include_directories(tflite_vanilla_run PRIVATE src)
target_include_directories(tflite_vanilla_run PRIVATE ${Boost_INCLUDE_DIRS})
-target_link_libraries(tflite_vanilla_run tensorflow-lite-2.3.0 ${LIB_PTHREAD} dl)
+target_link_libraries(tflite_vanilla_run tensorflow-lite-2.8.0 ${LIB_PTHREAD} dl)
target_link_libraries(tflite_vanilla_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
target_link_libraries(tflite_vanilla_run nnfw_lib_benchmark nnfw_lib_misc)
diff --git a/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc b/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc
index 77b5e7a37..6194b4505 100644
--- a/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc
+++ b/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc
@@ -16,6 +16,7 @@
#include "tensorflow/lite/model.h"
#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
#include "args.h"
#include "tensor_view.h"
@@ -128,7 +129,11 @@ int main(const int argc, char **argv)
try
{
phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
- interpreter->UseNNAPI(use_nnapi);
+ if (use_nnapi)
+ {
+ // TFLite NNAPI is not worked yet
+ interpreter->ModifyGraphWithDelegate(tflite::NnApiDelegate());
+ }
interpreter->AllocateTensors();
});
}
diff --git a/tools/cross/arm/sources.list.jammy b/tools/cross/arm/sources.list.jammy
new file mode 100644
index 000000000..6bb045302
--- /dev/null
+++ b/tools/cross/arm/sources.list.jammy
@@ -0,0 +1,11 @@
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe
+
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe
+
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted
+
+deb http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse
+deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse
diff --git a/tools/cross/arm/sources.list.xenial b/tools/cross/arm/sources.list.xenial
deleted file mode 100644
index 56fbb36a5..000000000
--- a/tools/cross/arm/sources.list.xenial
+++ /dev/null
@@ -1,11 +0,0 @@
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial-updates main restricted universe
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-updates main restricted universe
-
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial-backports main restricted
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-backports main restricted
-
-deb http://ports.ubuntu.com/ubuntu-ports/ xenial-security main restricted universe multiverse
-deb-src http://ports.ubuntu.com/ubuntu-ports/ xenial-security main restricted universe multiverse
diff --git a/tools/cross/install_rootfs.sh b/tools/cross/install_rootfs.sh
index f03d52371..16f3a5cad 100755
--- a/tools/cross/install_rootfs.sh
+++ b/tools/cross/install_rootfs.sh
@@ -3,7 +3,7 @@ usage()
{
echo "Usage: $0 [BuildArch] [LinuxCodeName] [--setproxy=IP] [--skipunmount]"
echo "BuildArch can be: arm(default), aarch64 and armel"
- echo "LinuxCodeName - optional, Code name for Linux, can be: xenial, bionic(default), focal"
+ echo "LinuxCodeName - optional, Code name for Linux, can be: bionic(default), focal, jammy"
echo " If BuildArch is armel, this can be tizen(default)"
echo "--setproxy=IP - optional, IP is the proxy server IP address or url with portnumber"
echo " default no proxy. Example: --setproxy=127.1.2.3:8080"
@@ -22,12 +22,15 @@ __SkipUnmount=0
__IsProxySet=0
__Apt=""
# base development support
+# install cmake to find cmake package configuration for target file system
__UbuntuPackages="build-essential"
+__UbuntuPackages+=" cmake"
# other development supports
__UbuntuPackages+=" ocl-icd-opencl-dev"
__UbuntuPackages+=" libhdf5-dev"
__UbuntuPackages+=" libboost-all-dev"
+__UbuntuPackages+=" libglib2.0-dev"
# symlinks fixer
__UbuntuPackages+=" symlinks"
@@ -67,15 +70,15 @@ for i in "$@" ; do
__UbuntuRepo=
__LinuxCodeName=
;;
- xenial)
- __LinuxCodeName=xenial
- ;;
bionic)
__LinuxCodeName=bionic
;;
focal)
__LinuxCodeName=focal
;;
+ jammy)
+ __LinuxCodeName=jammy
+ ;;
--setproxy*)
proxyip="${i#*=}"
__Apt="Acquire::http::proxy \"http://$proxyip/\";\n"
diff --git a/tools/nnpackage_tool/gen_golden/gen_golden.py b/tools/nnpackage_tool/gen_golden/gen_golden.py
index 79c86e6d7..d555419a6 100755
--- a/tools/nnpackage_tool/gen_golden/gen_golden.py
+++ b/tools/nnpackage_tool/gen_golden/gen_golden.py
@@ -96,7 +96,7 @@ if __name__ == '__main__':
np.random.randint(-127, 127, this_shape).astype(np.int8))
elif this_dtype == tf.float32:
input_values.append(
- np.random.random_sample(this_shape).astype(np.float32))
+ (10 * np.random.random_sample(this_shape) - 5).astype(np.float32))
elif this_dtype == tf.bool:
# generate random integer from [0, 2)
input_values.append(
@@ -142,7 +142,7 @@ if __name__ == '__main__':
np.random.randint(-127, 127, this_shape).astype(np.int8))
elif this_dtype == np.float32:
input_values.append(
- np.random.random_sample(this_shape).astype(np.float32))
+ (10 * np.random.random_sample(this_shape) - 5).astype(np.float32))
elif this_dtype == np.bool_:
# generate random integer from [0, 2)
input_values.append(
diff --git a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
index 9374af737..5c7c35b13 100755
--- a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
+++ b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
@@ -5,24 +5,33 @@ set -eu
progname=$(basename "${BASH_SOURCE[0]}")
outdir="."
name=""
-config=""
-config_src=""
+configs_src=()
+models_src=()
+configs_str=""
+models_str=""
+types_str=""
usage() {
- echo "Usage: $progname [options] modelfile"
+ echo "Usage: $progname [options]"
echo "Convert modelfile (tflite, circle or tvn) to nnpackage."
echo ""
echo "Options:"
echo " -h show this help"
echo " -o set nnpackage output directory (default=$outdir)"
- echo " -p set nnpackage output name (default=[modelfile name])"
- echo " -c provide configuration file"
+ echo " -p set nnpackage output name (default=[1st modelfile name])"
+ echo " -c provide configuration files"
+ echo " -m provide model files"
+ echo ""
+ echo " (Will be deprecated: if there is one remain parameter, that is model file)"
echo ""
echo "Examples:"
- echo " $progname add.tflite => create nnpackage 'add' in $outdir/"
- echo " $progname -o out add.tflite => create nnpackage 'add' in out/"
- echo " $progname -o out -p addpkg add.tflite => create nnpackage 'addpkg' in out/"
- echo " $progname -c add.cfg add.tflite => create nnpackage 'add' with add.cfg"
+ echo " $progname -m add.tflite => create nnpackage 'add' in $outdir/"
+ echo " $progname -o out -m add.tflite => create nnpackage 'add' in out/"
+ echo " $progname -o out -p addpkg -m add.tflite => create nnpackage 'addpkg' in out/"
+ echo " $progname -c add.cfg -m add.tflite => create nnpackage 'add' with add.cfg"
+ echo " $progname -o out -p addpkg -m a1.tflite a2.tflite => create nnpackage 'addpkg' with models a1.tflite and a2.tflite in out/"
+ echo ""
+ echo "(Will be deprecated: if there is one remain parameter, that is model file)"
exit 1
}
@@ -31,58 +40,116 @@ if [ $# -eq 0 ]; then
exit 1
fi
-while getopts "ho:p:c:" OPTION; do
-case "${OPTION}" in
+while getopts "ho:p:c:m:" OPTION; do
+ case "${OPTION}" in
h) usage;;
o) outdir=$OPTARG;;
p) name=$OPTARG;;
- c) config_src=$OPTARG;;
+ c)
+ configs_src=($OPTARG)
+ until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z $(eval "echo \${$OPTIND}") ]; do
+ if [[ $OPTIND -eq $# ]] && [[ ${#models_src[@]} -eq 0 ]]; then
+ # Backward compatibility (will be deprecated)
+ # The last remain parameter is model if there is no option "-m"
+ models_src=($(eval "echo \${$OPTIND}"))
+ else
+ configs_src+=($(eval "echo \${$OPTIND}"))
+ fi
+ OPTIND=$((OPTIND + 1))
+ done
+ ;;
+ m)
+ models_src=($OPTARG)
+ until [[ $OPTIND -gt $# ]] || [[ $(eval "echo \${$OPTIND}") =~ ^-.* ]] || [ -z $(eval "echo \${$OPTIND}") ]; do
+ models_src+=($(eval "echo \${$OPTIND}"))
+ OPTIND=$((OPTIND + 1))
+ done
+ ;;
?) exit 1;;
-esac
+ esac
done
shift $((OPTIND-1))
-if [ $# -ne 1 ]; then
- >&2 echo "error: wrong argument (no argument or too many arguments)."
- >&2 echo "For help, type $progname -h"
- exit 1
+# Backward compatibility (will be deprecated)
+# The last remain parameter is model if there is no option "-m"
+if [ $# -eq 1 ] && [ ${#models_src[@]} -eq 0 ]; then
+ models_src=($1)
+ shift 1
fi
-modelfile=$(basename "$1")
-
-if [[ "$modelfile" != *.* ]]; then
- >&2 echo "error: modelfile does not have extension."
- >&2 echo "Please provide extension so that $progname can identify what type of model you use."
+if [ $# -ne 0 ]; then
+ >&2 echo "error: wrong argument (too many arguments)."
+ >&2 echo "For help, type $progname -h"
exit 1
fi
-if [ ! -e $1 ]; then
- >&2 echo "error: "$1" does not exist."
+if [[ ${#configs_src[@]} -ne 0 ]] && [[ ${#configs_src[@]} -ne ${#models_src[@]} ]]; then
+ >&2 echo "error: when config file is provided, # of config file should be same with modelfile"
+ >&2 echo "Please provide config file for each model file, or don't provide config file."
exit 1
fi
+delim=""
+for modelpath in ${models_src[@]}
+do
+ modelfile=$(basename "$modelpath")
+
+ if [[ "$modelfile" != *.* ]]; then
+ >&2 echo "error: modelfile does not have extension."
+ >&2 echo "Please provide extension so that $progname can identify what type of model you use."
+ exit 1
+ fi
+
+ if [ ! -e $modelpath ]; then
+ >&2 echo "error: "$modelpath" does not exist."
+ exit 1
+ fi
+
+ models_str="$models_str$delim\"$modelfile\""
+ types_str="$types_str$delim\"${modelfile##*.}\""
+ delim=", "
+done
+
+delim=""
+for configpath in ${configs_src[@]}
+do
+ configfile=$(basename "$configpath")
+
+ if [ ! -e $configpath ]; then
+ >&2 echo "error: "$configpath" does not exist."
+ exit 1
+ fi
+
+ configs_str="$configs_str$delim\"$configfile\""
+ delim=", "
+done
+
if [ -z "$name" ]; then
- name=${modelfile%.*}
+ first_modelfile=$(basename "${models_src[0]}")
+ name=${first_modelfile%.*}
fi
-extension=${modelfile##*.}
echo "$progname: Generating nnpackage "$name" in "$outdir""
mkdir -p "$outdir"/"$name"/metadata
-if [ -s "$config_src" ]; then
- config=$(basename "$config_src")
- cp "$config_src" "$outdir/$name/metadata/$config"
-fi
-
cat > "$outdir"/"$name"/metadata/MANIFEST <<-EOF
{
"major-version" : "1",
"minor-version" : "2",
"patch-version" : "0",
- "configs" : [ "$config" ],
- "models" : [ "$modelfile" ],
- "model-types" : [ "$extension" ]
+ "configs" : [ $configs_str ],
+ "models" : [ $models_str ],
+ "model-types" : [ $types_str ]
}
EOF
-cp "$1" "$outdir"/"$name"
+
+for modelpath in ${models_src[@]}
+do
+ cp "$modelpath" "$outdir"/"$name"
+done
+
+for configpath in ${configs_src[@]}
+do
+ cp "$configpath" "$outdir/$name/metadata"
+done