summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2021-10-19 11:32:46 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2021-10-19 11:32:46 +0900
commit33ae5d70a1ed85d215c1293ed63afbf3517b07d5 (patch)
tree9f1ace0f4760a8f7903ef15e2e92f1d1401e4b1e
parentf4cf19e579a19c5346ccb2aad55bfd251065e447 (diff)
downloadnnfw-33ae5d70a1ed85d215c1293ed63afbf3517b07d5.tar.gz
nnfw-33ae5d70a1ed85d215c1293ed63afbf3517b07d5.tar.bz2
nnfw-33ae5d70a1ed85d215c1293ed63afbf3517b07d5.zip
-rw-r--r--.ahub/tcchecker-tca/config.yaml15
-rw-r--r--compiler/arser/include/arser/arser.h82
-rw-r--r--compiler/arser/tests/arser.test.cpp64
-rw-r--r--compiler/circle-opselector/CMakeLists.txt36
-rw-r--r--compiler/circle-opselector/README.md21
-rw-r--r--compiler/circle-opselector/driver/Driver.cpp274
-rw-r--r--compiler/circle-opselector/requires.cmake6
-rw-r--r--compiler/circle-opselector/src/Driver.test.cpp66
-rw-r--r--compiler/circle-opselector/src/Driver.test.h27
-rw-r--r--compiler/circle-opselector/src/ModuleIO.cpp72
-rw-r--r--compiler/circle-opselector/src/ModuleIO.h33
-rw-r--r--compiler/circle-opselector/src/ModuleIO.test.cpp26
-rw-r--r--compiler/circle-opselector/src/TestHelper.h52
-rw-r--r--compiler/circle-part-value-test/CMakeLists.txt2
-rw-r--r--compiler/circle-partitioner/README.md4
-rw-r--r--compiler/circle-quantizer/src/CircleQuantizer.cpp42
-rw-r--r--compiler/circle2circle-dredd-recipe-test/test.lst1
-rw-r--r--compiler/circle2circle/src/Circle2Circle.cpp32
-rw-r--r--compiler/circledump/CMakeLists.txt2
-rw-r--r--compiler/common-artifacts/CMakeLists.txt24
-rw-r--r--compiler/enco/frontend/tflite/CMakeLists.txt4
-rw-r--r--compiler/exo/CMakeLists.txt2
-rw-r--r--compiler/luci-interpreter/CMakeLists.txt8
-rw-r--r--compiler/luci-interpreter/README.md158
-rw-r--r--compiler/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h144
-rw-r--r--compiler/luci-interpreter/include/luci_interpreter/Interpreter.h7
-rw-r--r--compiler/luci-interpreter/include/luci_interpreter/MemoryManager.h37
-rw-r--r--compiler/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h34
-rw-r--r--compiler/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h45
-rw-r--r--compiler/luci-interpreter/include/luci_interpreter/TestMemoryManager.h47
-rw-r--r--compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h53
-rw-r--r--compiler/luci-interpreter/pal/linux/KernelsToBuild.lst68
-rw-r--r--compiler/luci-interpreter/pal/linux/PALArgMax.h33
-rw-r--r--compiler/luci-interpreter/pal/linux/PALBatchToSpaceND.h37
-rw-r--r--compiler/luci-interpreter/pal/linux/PALConv2d.h79
-rw-r--r--compiler/luci-interpreter/pal/linux/PALDepthToSpace.h35
-rw-r--r--compiler/luci-interpreter/pal/linux/PALElu.h31
-rw-r--r--compiler/luci-interpreter/pal/linux/PALL2Normalize.h34
-rw-r--r--compiler/luci-interpreter/pal/linux/PALL2Pool2D.h33
-rw-r--r--compiler/luci-interpreter/pal/linux/PALLeakyRelu.h32
-rw-r--r--compiler/luci-interpreter/pal/linux/PALLocalResponseNormalization.h34
-rw-r--r--compiler/luci-interpreter/pal/linux/PALLogSoftmax.h47
-rw-r--r--compiler/luci-interpreter/pal/linux/PALMul.h45
-rw-r--r--compiler/luci-interpreter/pal/linux/PALNeg.h32
-rw-r--r--compiler/luci-interpreter/pal/linux/PALRelu.h39
-rw-r--r--compiler/luci-interpreter/pal/linux/PALRelu6.h39
-rw-r--r--compiler/luci-interpreter/pal/linux/PALResizeBilinear.h37
-rw-r--r--compiler/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h37
-rw-r--r--compiler/luci-interpreter/pal/linux/PALSlice.h33
-rw-r--r--compiler/luci-interpreter/pal/linux/PALSoftmax.h47
-rw-r--r--compiler/luci-interpreter/pal/linux/PALSpaceToBatchND.h38
-rw-r--r--compiler/luci-interpreter/pal/linux/PALSpaceToDepth.h35
-rw-r--r--compiler/luci-interpreter/pal/linux/PALSplit.h33
-rw-r--r--compiler/luci-interpreter/pal/linux/PALSub.h35
-rw-r--r--compiler/luci-interpreter/pal/linux/pal.cmake15
-rw-r--r--compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst56
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALArgMax.h33
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALBatchToSpaceND.h37
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALConv2d.h70
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALDepthToSpace.h35
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALElu.h33
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALL2Normalize.h34
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALL2Pool2D.h33
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALLeakyRelu.h32
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALMul.h45
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALNeg.h32
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALResizeBilinear.h37
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h37
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALSoftmax.h62
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALSpaceToBatchND.h38
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALSpaceToDepth.h35
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALSub.h35
-rw-r--r--compiler/luci-interpreter/pal/mcu/pal.cmake17
-rw-r--r--compiler/luci-interpreter/src/BuddyMemoryManager.cpp96
-rw-r--r--compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp69
-rw-r--r--compiler/luci-interpreter/src/CMakeLists.txt43
-rw-r--r--compiler/luci-interpreter/src/Interpreter.cpp18
-rw-r--r--compiler/luci-interpreter/src/SimpleMemoryManager.cpp51
-rw-r--r--compiler/luci-interpreter/src/StaticMemoryManager.cpp39
-rw-r--r--compiler/luci-interpreter/src/TestMemoryManager.cpp45
-rw-r--r--compiler/luci-interpreter/src/core/CMakeLists.txt12
-rw-r--r--compiler/luci-interpreter/src/core/Kernel.h4
-rw-r--r--compiler/luci-interpreter/src/core/RuntimeGraph.cpp44
-rw-r--r--compiler/luci-interpreter/src/core/RuntimeGraph.h6
-rw-r--r--compiler/luci-interpreter/src/core/RuntimeModule.h5
-rw-r--r--compiler/luci-interpreter/src/core/Tensor.cpp21
-rw-r--r--compiler/luci-interpreter/src/kernels/Add.test.cpp76
-rw-r--r--compiler/luci-interpreter/src/kernels/ArgMax.cpp10
-rw-r--r--compiler/luci-interpreter/src/kernels/ArgMax.test.cpp23
-rw-r--r--compiler/luci-interpreter/src/kernels/AveragePool2D.cpp28
-rw-r--r--compiler/luci-interpreter/src/kernels/AveragePool2D.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp84
-rw-r--r--compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp6
-rw-r--r--compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp28
-rw-r--r--compiler/luci-interpreter/src/kernels/CMakeLists.txt249
-rw-r--r--compiler/luci-interpreter/src/kernels/Cast.test.cpp185
-rw-r--r--compiler/luci-interpreter/src/kernels/Concatenation.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Concatenation.test.cpp71
-rw-r--r--compiler/luci-interpreter/src/kernels/Conv2D.cpp141
-rw-r--r--compiler/luci-interpreter/src/kernels/Conv2D.h5
-rw-r--r--compiler/luci-interpreter/src/kernels/Conv2D.test.cpp292
-rw-r--r--compiler/luci-interpreter/src/kernels/DepthToSpace.cpp14
-rw-r--r--compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp18
-rw-r--r--compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp63
-rw-r--r--compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp206
-rw-r--r--compiler/luci-interpreter/src/kernels/Div.cpp3
-rw-r--r--compiler/luci-interpreter/src/kernels/Div.test.cpp51
-rw-r--r--compiler/luci-interpreter/src/kernels/Elu.cpp6
-rw-r--r--compiler/luci-interpreter/src/kernels/Elu.test.cpp10
-rw-r--r--compiler/luci-interpreter/src/kernels/Equal.h4
-rw-r--r--compiler/luci-interpreter/src/kernels/Equal.test.cpp57
-rw-r--r--compiler/luci-interpreter/src/kernels/Exp.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Exp.test.cpp6
-rw-r--r--compiler/luci-interpreter/src/kernels/Floor.test.cpp19
-rw-r--r--compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp42
-rw-r--r--compiler/luci-interpreter/src/kernels/FullyConnected.cpp43
-rw-r--r--compiler/luci-interpreter/src/kernels/FullyConnected.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp101
-rw-r--r--compiler/luci-interpreter/src/kernels/Greater.h4
-rw-r--r--compiler/luci-interpreter/src/kernels/Greater.test.cpp68
-rw-r--r--compiler/luci-interpreter/src/kernels/GreaterEqual.h4
-rw-r--r--compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp68
-rw-r--r--compiler/luci-interpreter/src/kernels/If.cpp4
-rw-r--r--compiler/luci-interpreter/src/kernels/If.test.cpp76
-rw-r--r--compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp39
-rw-r--r--compiler/luci-interpreter/src/kernels/L2Normalize.cpp8
-rw-r--r--compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp20
-rw-r--r--compiler/luci-interpreter/src/kernels/L2Pool2D.cpp8
-rw-r--r--compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp61
-rw-r--r--compiler/luci-interpreter/src/kernels/LeakyRelu.cpp10
-rw-r--r--compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp23
-rw-r--r--compiler/luci-interpreter/src/kernels/Less.h4
-rw-r--r--compiler/luci-interpreter/src/kernels/Less.test.cpp68
-rw-r--r--compiler/luci-interpreter/src/kernels/LessEqual.h4
-rw-r--r--compiler/luci-interpreter/src/kernels/LessEqual.test.cpp68
-rw-r--r--compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp4
-rw-r--r--compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp49
-rw-r--r--compiler/luci-interpreter/src/kernels/LogSoftmax.cpp13
-rw-r--r--compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp33
-rw-r--r--compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp42
-rw-r--r--compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp25
-rw-r--r--compiler/luci-interpreter/src/kernels/LogicalOr.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp42
-rw-r--r--compiler/luci-interpreter/src/kernels/Logistic.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Logistic.test.cpp26
-rw-r--r--compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp28
-rw-r--r--compiler/luci-interpreter/src/kernels/Maximum.test.cpp27
-rw-r--r--compiler/luci-interpreter/src/kernels/Mean.cpp83
-rw-r--r--compiler/luci-interpreter/src/kernels/Mean.h7
-rw-r--r--compiler/luci-interpreter/src/kernels/Mean.test.cpp113
-rw-r--r--compiler/luci-interpreter/src/kernels/Minimum.test.cpp27
-rw-r--r--compiler/luci-interpreter/src/kernels/MirrorPad.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp17
-rw-r--r--compiler/luci-interpreter/src/kernels/Mul.cpp12
-rw-r--r--compiler/luci-interpreter/src/kernels/Mul.test.cpp43
-rw-r--r--compiler/luci-interpreter/src/kernels/Neg.cpp6
-rw-r--r--compiler/luci-interpreter/src/kernels/Neg.test.cpp6
-rw-r--r--compiler/luci-interpreter/src/kernels/NotEqual.h4
-rw-r--r--compiler/luci-interpreter/src/kernels/NotEqual.test.cpp57
-rw-r--r--compiler/luci-interpreter/src/kernels/PRelu.cpp12
-rw-r--r--compiler/luci-interpreter/src/kernels/PRelu.h2
-rw-r--r--compiler/luci-interpreter/src/kernels/PRelu.test.cpp124
-rw-r--r--compiler/luci-interpreter/src/kernels/Pack.test.cpp19
-rw-r--r--compiler/luci-interpreter/src/kernels/Pad.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Pad.test.cpp18
-rw-r--r--compiler/luci-interpreter/src/kernels/PadV2.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/PadV2.test.cpp25
-rw-r--r--compiler/luci-interpreter/src/kernels/Pow.test.cpp54
-rw-r--r--compiler/luci-interpreter/src/kernels/Relu.cpp8
-rw-r--r--compiler/luci-interpreter/src/kernels/Relu.test.cpp44
-rw-r--r--compiler/luci-interpreter/src/kernels/Relu6.cpp8
-rw-r--r--compiler/luci-interpreter/src/kernels/Relu6.test.cpp38
-rw-r--r--compiler/luci-interpreter/src/kernels/Reshape.test.cpp27
-rw-r--r--compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp6
-rw-r--r--compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp88
-rw-r--r--compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp6
-rw-r--r--compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp72
-rw-r--r--compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp9
-rw-r--r--compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp16
-rw-r--r--compiler/luci-interpreter/src/kernels/Slice.cpp13
-rw-r--r--compiler/luci-interpreter/src/kernels/Slice.test.cpp12
-rw-r--r--compiler/luci-interpreter/src/kernels/Softmax.cpp14
-rw-r--r--compiler/luci-interpreter/src/kernels/Softmax.test.cpp47
-rw-r--r--compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp6
-rw-r--r--compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp37
-rw-r--r--compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp14
-rw-r--r--compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp7
-rw-r--r--compiler/luci-interpreter/src/kernels/Split.cpp12
-rw-r--r--compiler/luci-interpreter/src/kernels/Split.test.cpp12
-rw-r--r--compiler/luci-interpreter/src/kernels/SplitV.cpp89
-rw-r--r--compiler/luci-interpreter/src/kernels/SplitV.h49
-rw-r--r--compiler/luci-interpreter/src/kernels/SplitV.test.cpp112
-rw-r--r--compiler/luci-interpreter/src/kernels/Sqrt.test.cpp18
-rw-r--r--compiler/luci-interpreter/src/kernels/Square.test.cpp7
-rw-r--r--compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp19
-rw-r--r--compiler/luci-interpreter/src/kernels/Squeeze.test.cpp7
-rw-r--r--compiler/luci-interpreter/src/kernels/StridedSlice.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/Sub.cpp10
-rw-r--r--compiler/luci-interpreter/src/kernels/Sub.test.cpp51
-rw-r--r--compiler/luci-interpreter/src/kernels/Tanh.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Tanh.test.cpp34
-rw-r--r--compiler/luci-interpreter/src/kernels/TestUtils.cpp5
-rw-r--r--compiler/luci-interpreter/src/kernels/TestUtils.h29
-rw-r--r--compiler/luci-interpreter/src/kernels/Transpose.cpp8
-rw-r--r--compiler/luci-interpreter/src/kernels/Transpose.test.cpp8
-rw-r--r--compiler/luci-interpreter/src/kernels/TransposeConv.cpp37
-rw-r--r--compiler/luci-interpreter/src/kernels/TransposeConv.h5
-rw-r--r--compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp120
-rw-r--r--compiler/luci-interpreter/src/kernels/Unpack.test.cpp9
-rw-r--r--compiler/luci-interpreter/src/kernels/Utils.cpp4
-rw-r--r--compiler/luci-interpreter/src/kernels/While.cpp11
-rw-r--r--compiler/luci-interpreter/src/kernels/While.test.cpp31
-rw-r--r--compiler/luci-interpreter/src/loader/CMakeLists.txt27
-rw-r--r--compiler/luci-interpreter/src/loader/GraphLoader.cpp12
-rw-r--r--compiler/luci-interpreter/src/loader/GraphLoader.h5
-rw-r--r--compiler/luci-interpreter/src/loader/KernelBuilder.cpp1244
-rw-r--r--compiler/luci-interpreter/src/loader/KernelBuilder.h12
-rw-r--r--compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp35
-rw-r--r--compiler/luci-interpreter/src/loader/KernelBuilderHelper.h19
-rw-r--r--compiler/luci-interpreter/src/loader/ModuleLoader.cpp9
-rw-r--r--compiler/luci-interpreter/src/loader/ModuleLoader.h5
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Add.cpp42
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp41
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp46
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp40
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Builders.h37
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Cast.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp44
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp54
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp41
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp49
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Div.cpp41
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Elu.cpp37
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Equal.cpp40
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Exp.cpp38
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Floor.cpp38
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp43
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Greater.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/If.cpp49
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp45
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp41
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp46
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp40
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Less.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp44
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp38
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp38
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Logistic.cpp38
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp46
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Maximum.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Mean.cpp63
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Minimum.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp42
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Mul.cpp42
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Neg.cpp38
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/PRelu.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Pack.cpp46
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Pad.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/PadV2.cpp40
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Pow.cpp40
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Relu.cpp38
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Relu6.cpp38
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Reshape.cpp40
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp43
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp48
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp38
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Slice.cpp41
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Softmax.cpp41
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp41
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp41
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Split.cpp42
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/SplitV.cpp43
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp38
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Square.cpp38
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp41
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp49
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Sub.cpp42
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Tanh.cpp38
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Transpose.cpp39
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp57
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Unpack.cpp44
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/While.cpp49
-rw-r--r--compiler/luci-micro/CMakeLists.txt57
-rw-r--r--compiler/luci-micro/README.md56
-rw-r--r--compiler/luci-micro/requires.cmake1
-rw-r--r--compiler/luci-micro/standalone/CMakeLists.txt20
-rw-r--r--compiler/luci-micro/standalone/Toolchain.cmake8
-rw-r--r--compiler/luci-pass-value-test/CMakeLists.txt2
-rw-r--r--compiler/luci-value-test/CMakeLists.txt2
-rw-r--r--compiler/luci-value-test/README.md4
-rwxr-xr-xcompiler/luci-value-test/luci_eval_verifier.py28
-rw-r--r--compiler/luci/CMakeLists.txt12
-rw-r--r--compiler/luci/env/CMakeLists.txt6
-rw-r--r--compiler/luci/env/include/luci/UserSettings.h1
-rw-r--r--compiler/luci/env/src/UserSettings.cpp6
-rw-r--r--compiler/luci/env/src/UserSettings.test.cpp24
-rw-r--r--compiler/luci/export/CMakeLists.txt7
-rw-r--r--compiler/luci/export/src/CircleExportMetadata.cpp31
-rw-r--r--compiler/luci/export/src/CircleOperationExporter.cpp20
-rw-r--r--compiler/luci/export/src/SerializedData.h11
-rw-r--r--compiler/luci/import/CMakeLists.txt7
-rw-r--r--compiler/luci/import/src/CircleImportMetadata.cpp51
-rw-r--r--compiler/luci/import/src/CircleImportMetadata.h6
-rw-r--r--compiler/luci/import/src/Importer.cpp20
-rw-r--r--compiler/luci/lang/CMakeLists.txt6
-rw-r--r--compiler/luci/lang/include/luci/IR/ExecutionPlanTable.h27
-rw-r--r--compiler/luci/log/CMakeLists.txt6
-rw-r--r--compiler/luci/logex/CMakeLists.txt6
-rw-r--r--compiler/luci/partition/CMakeLists.txt6
-rw-r--r--compiler/luci/pass/CMakeLists.txt15
-rw-r--r--compiler/luci/pass/include/luci/CircleOptimizer.h17
-rw-r--r--compiler/luci/pass/include/luci/Pass/ExpandBroadcastConstPass.h37
-rw-r--r--compiler/luci/pass/include/luci/Pass/FoldDepthwiseConv2DPass.h38
-rw-r--r--compiler/luci/pass/include/luci/Pass/ForceQuantParamPass.h56
-rw-r--r--compiler/luci/pass/include/luci/Pass/FuseAddWithFullyConnectedPass.h37
-rw-r--r--compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h9
-rw-r--r--compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h9
-rw-r--r--compiler/luci/pass/include/luci/Pass/SubstituteSplitVToSplitPass.h37
-rw-r--r--compiler/luci/pass/src/CircleOptimizer.cpp147
-rw-r--r--compiler/luci/pass/src/CircleOptimizer.test.cpp46
-rw-r--r--compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp108
-rw-r--r--compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp111
-rw-r--r--compiler/luci/pass/src/ExpandBroadcastConstPass.cpp178
-rw-r--r--compiler/luci/pass/src/ExpandBroadcastConstPass.test.cpp148
-rw-r--r--compiler/luci/pass/src/FoldDepthwiseConv2DPass.cpp241
-rw-r--r--compiler/luci/pass/src/FoldDepthwiseConv2DPass.test.cpp127
-rw-r--r--compiler/luci/pass/src/ForceQuantParamPass.cpp77
-rw-r--r--compiler/luci/pass/src/ForceQuantParamPass.test.cpp184
-rw-r--r--compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp134
-rw-r--r--compiler/luci/pass/src/FuseAddWithFullyConnectedPass.test.cpp176
-rw-r--r--compiler/luci/pass/src/PropagateQuantParamPass.cpp8
-rw-r--r--compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp2
-rw-r--r--compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp37
-rw-r--r--compiler/luci/pass/src/QuantizedModelVerifier.test.cpp62
-rw-r--r--compiler/luci/pass/src/ResolveCustomOpAddPass.cpp4
-rw-r--r--compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp4
-rw-r--r--compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp5
-rw-r--r--compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp3
-rw-r--r--compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp3
-rw-r--r--compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp131
-rw-r--r--compiler/luci/pass/src/SubstituteSplitVToSplitPass.test.cpp199
-rw-r--r--compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp13
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h13
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h13
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h20
-rw-r--r--compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h20
-rw-r--r--compiler/luci/plan/CMakeLists.txt15
-rw-r--r--compiler/luci/plan/include/luci/Plan/CircleNodeExecutionPlan.h58
-rw-r--r--compiler/luci/plan/src/CircleNodeExecutionPlan.cpp77
-rw-r--r--compiler/luci/profile/CMakeLists.txt6
-rw-r--r--compiler/luci/requires.cmake1
-rw-r--r--compiler/luci/service/CMakeLists.txt6
-rw-r--r--compiler/luci/service/src/CircleTypeInferenceRule.cpp3
-rw-r--r--compiler/mio-circle/CMakeLists.txt2
-rw-r--r--compiler/mio-tflite/CMakeLists.txt12
-rw-r--r--compiler/mio-tflite260/CMakeLists.txt49
-rw-r--r--compiler/mio-tflite260/README.md3
-rw-r--r--compiler/mio-tflite260/example.cpp41
-rw-r--r--compiler/mir/src/mir_tflite_importer/CMakeLists.txt2
-rw-r--r--compiler/one-cmds/CMakeLists.txt1
-rw-r--r--compiler/one-cmds/conv_mixin_1.8.0.patch11
-rw-r--r--compiler/one-cmds/how-to-use-one-commands.txt5
-rw-r--r--compiler/one-cmds/one-codegen6
-rw-r--r--compiler/one-cmds/one-prepare-venv26
-rw-r--r--compiler/one-cmds/one-profile9
-rw-r--r--compiler/one-cmds/one-quantize67
-rw-r--r--compiler/one-cmds/tests/one-import_neg_002.test6
-rw-r--r--compiler/one-cmds/tests/one-import_neg_006.test7
-rw-r--r--compiler/one-cmds/tests/one-quantize_005.test46
-rw-r--r--compiler/one-cmds/tests/one-quantize_006.test49
-rw-r--r--compiler/one-cmds/tests/one-quantize_neg_018.test49
-rw-r--r--compiler/one-cmds/tests/onecc_022.cfg18
-rw-r--r--compiler/one-cmds/tests/onecc_022.test42
-rw-r--r--compiler/one-cmds/tests/prepare_test_materials.sh10
-rw-r--r--compiler/one-cmds/utils.py72
-rw-r--r--compiler/pota-quantization-value-test/CMakeLists.txt4
-rw-r--r--compiler/pota-quantization-value-test/requires.cmake1
-rw-r--r--compiler/tfl-inspect/CMakeLists.txt2
-rw-r--r--compiler/tfl-inspect/requires.cmake2
-rw-r--r--compiler/tfl-inspect/src/Reader.cpp21
-rw-r--r--compiler/tfl-inspect/src/Reader.h1
-rw-r--r--compiler/tfl-verify/CMakeLists.txt2
-rw-r--r--compiler/tfl-verify/requires.cmake2
-rw-r--r--compiler/tflchef/CMakeLists.txt6
-rw-r--r--compiler/tflchef/core/CMakeLists.txt2
-rw-r--r--compiler/tflchef/core/src/CustomOp/AddV2.cpp2
-rw-r--r--compiler/tflchef/core/src/CustomOp/All.cpp2
-rw-r--r--compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp2
-rw-r--r--compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp2
-rw-r--r--compiler/tflchef/core/src/CustomOp/MatMul.cpp2
-rw-r--r--compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp2
-rw-r--r--compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp2
-rw-r--r--compiler/tflchef/core/src/ModelChef.cpp8
-rw-r--r--compiler/tflchef/requires.cmake2
-rw-r--r--compiler/tflchef/tflite/CMakeLists.txt2
-rw-r--r--compiler/tflchef/tflite/src/TFliteImport.cpp20
-rw-r--r--compiler/tflchef/tflite/src/TFliteImport.h1
-rw-r--r--compiler/tfldump/CMakeLists.txt10
-rw-r--r--compiler/tfldump/requires.cmake2
-rw-r--r--compiler/tfldump/src/Dump.cpp38
-rw-r--r--compiler/tfldump/src/Read.cpp19
-rw-r--r--compiler/tfldump/src/Read.h4
-rw-r--r--compiler/tflite2circle/CMakeLists.txt5
-rw-r--r--compiler/tflite2circle/driver/Driver.cpp6
-rw-r--r--compiler/tflite2circle/include/CircleModel.h9
-rw-r--r--compiler/tflite2circle/include/TFLModel.h7
-rw-r--r--compiler/tflite2circle/requires.cmake3
-rw-r--r--compiler/tflite2circle/src/CircleModel.cpp52
-rw-r--r--compiler/tflite2circle/src/TFLModel.cpp26
-rw-r--r--compiler/vconone/CMakeLists.txt2
-rw-r--r--compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp8
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl292
-rw-r--r--docs/conf.py2
-rw-r--r--docs/release/1.18/index.rst13
-rw-r--r--docs/release/1.18/release-note-1.18.0.md11
-rw-r--r--infra/cmake/modules/ExternalSourceTools.cmake8
-rw-r--r--infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake13
-rw-r--r--infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfigVersion.cmake10
-rw-r--r--infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake118
-rw-r--r--infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake10
-rw-r--r--infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake118
-rw-r--r--infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake10
-rw-r--r--infra/cmake/packages/FlatBuffersConfig.cmake24
-rw-r--r--infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake13
-rw-r--r--infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfigVersion.cmake10
-rw-r--r--infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfig.cmake21
-rw-r--r--infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfigVersion.cmake10
-rw-r--r--infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake20
-rw-r--r--infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfigVersion.cmake10
-rw-r--r--infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake20
-rw-r--r--infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfigVersion.cmake10
-rw-r--r--infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake18
-rw-r--r--infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfigVersion.cmake10
-rw-r--r--infra/debian/compiler/changelog6
-rw-r--r--infra/debian/compiler/one-compiler.install1
-rw-r--r--infra/debian/compiler/one-compiler.links1
-rwxr-xr-xinfra/debian/compiler/rules2
-rw-r--r--infra/debian/runtime/changelog6
-rw-r--r--infra/nncc/CMakeLists.txt5
-rw-r--r--infra/nncc/command/utcount2
-rw-r--r--infra/packaging/build2
-rw-r--r--infra/packaging/preset/2021091055
-rw-r--r--infra/packaging/preset/20210910_windows67
-rw-r--r--infra/packaging/res/tf2nnpkg.20210910109
-rw-r--r--infra/scripts/compiler_modules.sh2
-rwxr-xr-xinfra/scripts/docker_collect_nnpkg_resources.sh2
-rw-r--r--packaging/nnfw.spec4
-rw-r--r--res/TensorFlowLiteRecipes/PadV2_001/test.recipe68
-rw-r--r--res/TensorFlowLiteRecipes/PadV2_001/test.rule8
-rw-r--r--res/TensorFlowLiteSchema/2.6.0/schema.fbs1240
-rw-r--r--res/TensorFlowLiteSchema/SCHEMA.lst1
-rw-r--r--runtime/contrib/android/api/build.gradle2
-rw-r--r--runtime/libs/ndarray/CMakeLists.txt23
-rw-r--r--runtime/libs/ndarray/example/CMakeLists.txt4
-rw-r--r--runtime/libs/ndarray/example/example_array.cpp76
-rw-r--r--runtime/libs/ndarray/example/example_no_array.cpp85
-rw-r--r--runtime/libs/ndarray/include/ndarray/Array.h195
-rw-r--r--runtime/libs/ndarray/include/ndarray/Common.h22
-rw-r--r--runtime/libs/ndarray/include/ndarray/ContiguousSpan.h108
-rw-r--r--runtime/libs/ndarray/include/ndarray/Shape.h66
-rw-r--r--runtime/libs/ndarray/src/Array.cpp27
-rw-r--r--runtime/libs/ndarray/src/ContiguousSpan.cpp31
-rw-r--r--runtime/libs/ndarray/src/detail/cxx14.h67
-rw-r--r--runtime/libs/ndarray/test/CMakeLists.txt18
-rw-r--r--runtime/libs/ndarray/test/ndarray_test.cpp122
-rw-r--r--runtime/onert/api/include/nnfw_version.h2
-rw-r--r--runtime/onert/backend/cpu/CMakeLists.txt1
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.cc46
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.h1
-rw-r--r--runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc306
-rw-r--r--runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.h105
-rw-r--r--runtime/onert/core/include/compiler/StaticShapeInferer.h1
-rw-r--r--runtime/onert/core/include/exec/DynamicShapeInferer.h1
-rw-r--r--runtime/onert/core/include/ir/Operations.Include.h1
-rw-r--r--runtime/onert/core/include/ir/Operations.lst1
-rw-r--r--runtime/onert/core/include/ir/operation/DetectionPostProcess.h90
-rw-r--r--runtime/onert/core/src/compiler/StaticShapeInferer.cc24
-rw-r--r--runtime/onert/core/src/exec/DynamicShapeInferer.cc8
-rw-r--r--runtime/onert/core/src/ir/OperationValidator.cc8
-rw-r--r--runtime/onert/core/src/ir/OperationValidator.h1
-rw-r--r--runtime/onert/core/src/ir/operation/DetectionPostProcess.cc37
-rw-r--r--runtime/onert/frontend/base_loader/include/base_loader.h47
-rw-r--r--tests/nnfw_api/src/CircleGen.cc56
-rw-r--r--tests/nnfw_api/src/CircleGen.h13
-rw-r--r--tests/nnfw_api/src/one_op_tests/ArgMinMax.cc85
-rw-r--r--tests/nnfw_api/src/one_op_tests/AveragePool2D.cc109
-rw-r--r--tests/nnfw_api/src/one_op_tests/Concat.cc71
-rw-r--r--tests/nnfw_api/src/one_op_tests/DepthToSpace.cc15
-rw-r--r--tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc114
-rw-r--r--tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc74
-rw-r--r--tests/nnfw_api/src/one_op_tests/Pad.cc66
-rw-r--r--tests/nnfw_api/src/one_op_tests/Slice.cc102
-rw-r--r--tests/nnfw_api/src/one_op_tests/Softmax.cc43
-rwxr-xr-xtools/release_tool/onert_version.sh2
504 files changed, 17146 insertions, 3618 deletions
diff --git a/.ahub/tcchecker-tca/config.yaml b/.ahub/tcchecker-tca/config.yaml
index 9c0a8d881..86d272d8a 100644
--- a/.ahub/tcchecker-tca/config.yaml
+++ b/.ahub/tcchecker-tca/config.yaml
@@ -25,21 +25,6 @@ test:
any: true
- extension: cc
any: true
- - excludes :
- - DepthwiseConv2D.cc
- - ArgMinMax.cc
- - AveragePool2D.cc
- - Concat.cc
- - DepthToSpace.cc
- - DepthwiseConv2D.cc
- - Fill.cc
- - If.cc
- - Pad.cc
- - Reduce.cc
- - ResizeBilinear.c
- - Slice.cc
- - Softmax.cc
- - While.cc
testCase:
- condition:
- functionName:
diff --git a/compiler/arser/include/arser/arser.h b/compiler/arser/include/arser/arser.h
index f2a7a2b85..1703e421e 100644
--- a/compiler/arser/include/arser/arser.h
+++ b/compiler/arser/include/arser/arser.h
@@ -238,6 +238,18 @@ public:
return *this;
}
+ Argument &accumulated(void)
+ {
+ _is_accumulated = true;
+ return *this;
+ }
+
+ Argument &accumulated(bool value)
+ {
+ _is_accumulated = value;
+ return *this;
+ }
+
Argument &help(std::string help_message)
{
_help_message = help_message;
@@ -296,7 +308,9 @@ private:
std::function<void(void)> _func;
uint32_t _nargs{1};
bool _is_required{false};
+ bool _is_accumulated{false};
std::vector<std::string> _values;
+ std::vector<std::vector<std::string>> _accum_values;
friend class Arser;
friend std::ostream &operator<<(std::ostream &, const Arser &);
@@ -403,6 +417,8 @@ public:
throw std::runtime_error("Invalid arguments. Positional argument must always be required.");
}
}
+ // TODO accumulated arguments shouldn't be enabled to positional arguments.
+ // TODO accumulated arguments shouldn't be enabled to optional arguments whose `narg` == 0.
}
void parse(int argc, char **argv)
@@ -475,6 +491,11 @@ public:
"You must have missed some argument.");
arg->second->_values.emplace_back(argv[c++]);
}
+ // accumulate values
+ if (arg->second->_is_accumulated)
+ {
+ arg->second->_accum_values.emplace_back(arg->second->_values);
+ }
if (arg->second->_nargs == 0)
{
// TODO std::boolalpha for true or false
@@ -493,6 +514,9 @@ public:
if (arg == _arg_map.end())
return false;
+ if (arg->second->_is_accumulated)
+ return arg->second->_accum_values.size() > 0 ? true : false;
+
return arg->second->_values.size() > 0 ? true : false;
}
@@ -500,6 +524,9 @@ public:
template <typename T> std::vector<T> get_impl(const std::string &arg_name, std::vector<T> *);
+ template <typename T>
+ std::vector<std::vector<T>> get_impl(const std::string &arg_name, std::vector<std::vector<T>> *);
+
template <typename T> T get(const std::string &arg_name);
friend std::ostream &operator<<(std::ostream &stream, const Arser &parser)
@@ -617,6 +644,12 @@ template <typename T> T Arser::get_impl(const std::string &arg_name, T *)
"There is no argument you are looking for: " +
arg_name);
+ if (arg->second->_is_accumulated)
+ throw std::runtime_error(
+ "Type mismatch. "
+ "You called get using a type different from the one you specified."
+ "Accumulated argument is returned as std::vector of the specified type");
+
if (arg->second->_type != TypeName<T>::Get())
throw std::runtime_error("Type mismatch. "
"You called get() method with a type different "
@@ -640,6 +673,22 @@ template <typename T> std::vector<T> Arser::get_impl(const std::string &arg_name
"There is no argument you are looking for: " +
arg_name);
+ // Accumulated arguments with scalar type (e.g., STR)
+ if (arg->second->_is_accumulated)
+ {
+ if (arg->second->_type != TypeName<T>::Get())
+ throw std::runtime_error("Type mismatch. "
+ "You called get using a type different from the one you specified.");
+
+ std::vector<T> data;
+ for (auto values : arg->second->_accum_values)
+ {
+ assert(values.size() == 1);
+ data.emplace_back(internal::lexical_cast<T>(values[0]));
+ }
+ return data;
+ }
+
if (arg->second->_type != TypeName<std::vector<T>>::Get())
throw std::runtime_error("Type mismatch. "
"You called get using a type different from the one you specified.");
@@ -650,6 +699,39 @@ template <typename T> std::vector<T> Arser::get_impl(const std::string &arg_name
return data;
}
+// Accumulated arguments with vector type (e.g., STR_VEC)
+template <typename T>
+std::vector<std::vector<T>> Arser::get_impl(const std::string &arg_name,
+ std::vector<std::vector<T>> *)
+{
+ auto arg = _arg_map.find(arg_name);
+ if (arg == _arg_map.end())
+ throw std::runtime_error("Invalid argument. "
+ "There is no argument you are looking for: " +
+ arg_name);
+
+ if (not arg->second->_is_accumulated)
+ throw std::runtime_error("Type mismatch. "
+ "You called get using a type different from the one you specified.");
+
+ if (arg->second->_type != TypeName<std::vector<T>>::Get())
+ throw std::runtime_error(
+ "Type mismatch. "
+ "You called get using a type different from the one you specified."
+ "Accumulated argument is returned as std::vector of the specified type");
+
+ std::vector<std::vector<T>> result;
+ for (auto values : arg->second->_accum_values)
+ {
+ std::vector<T> data;
+ std::transform(values.begin(), values.end(), std::back_inserter(data),
+ [](std::string str) -> T { return internal::lexical_cast<T>(str); });
+ result.emplace_back(data);
+ }
+
+ return result;
+}
+
template <typename T> T Arser::get(const std::string &arg_name)
{
return get_impl(arg_name, static_cast<T *>(nullptr));
diff --git a/compiler/arser/tests/arser.test.cpp b/compiler/arser/tests/arser.test.cpp
index b37d0dec3..4e88f0cb7 100644
--- a/compiler/arser/tests/arser.test.cpp
+++ b/compiler/arser/tests/arser.test.cpp
@@ -93,7 +93,7 @@ TEST(BasicTest, OptionalArgument)
EXPECT_THROW(arser.get<bool>("--volume"), std::runtime_error);
}
-TEST(BasicTest, NonRequiredOptionalArgument)
+TEST(BasicTest, NonRequiredOptionalArgument_NEG)
{
/* arrange */
Arser arser;
@@ -111,7 +111,7 @@ TEST(BasicTest, NonRequiredOptionalArgument)
EXPECT_THROW(arser.get<int>("--weight"), std::runtime_error);
}
-TEST(BasicTest, RequiredOptionalArgument)
+TEST(BasicTest, RequiredOptionalArgument_NEG)
{
/* arrange */
Arser arser;
@@ -395,7 +395,7 @@ TEST(BasicTest, shortMultipleOption)
EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path"));
}
-TEST(BasicTest, OptWithRequiredDuplicate)
+TEST(BasicTest, OptWithRequiredDuplicate_NEG)
{
/* arrange */
Arser arser;
@@ -441,3 +441,61 @@ TEST(BasicTest, OptWithNonRequiredDuplicate)
EXPECT_TRUE(arser["--output_path"]);
EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path"));
}
+
+TEST(BasicTest, AccumulateVectorOptions)
+{
+ /* arrange */
+ Arser arser;
+
+ arser.add_argument("--specify").nargs(3).accumulated(true).type(arser::DataType::STR_VEC);
+
+ Prompt prompt("./driver --specify a b c --specify 1 2 3");
+ /* act */
+ arser.parse(prompt.argc(), prompt.argv());
+ /* assert */
+ EXPECT_TRUE(arser["--specify"]);
+
+ auto specify = arser.get<std::vector<std::vector<std::string>>>("--specify");
+ auto first = specify[0];
+ EXPECT_EQ("a", first.at(0));
+ EXPECT_EQ("b", first.at(1));
+ EXPECT_EQ("c", first.at(2));
+ auto second = specify[1];
+ EXPECT_EQ("1", second.at(0));
+ EXPECT_EQ("2", second.at(1));
+ EXPECT_EQ("3", second.at(2));
+}
+
+TEST(BasicTest, AccumulateScalarOptions)
+{
+ /* arrange */
+ Arser arser;
+
+ arser.add_argument("--specify").nargs(1).accumulated(true).type(arser::DataType::FLOAT);
+
+ Prompt prompt("./driver --specify 1 --specify 2");
+ /* act */
+ arser.parse(prompt.argc(), prompt.argv());
+ /* assert */
+ EXPECT_TRUE(arser["--specify"]);
+
+ auto specify = arser.get<std::vector<float>>("--specify");
+ EXPECT_EQ(1, specify.at(0));
+ EXPECT_EQ(2, specify.at(1));
+}
+
+TEST(BasicTest, AccumulateScalarOptions_WrongType_NEG)
+{
+ /* arrange */
+ Arser arser;
+
+ arser.add_argument("--specify").nargs(1).accumulated(true).type(arser::DataType::FLOAT);
+
+ Prompt prompt("./driver --specify 1 --specify 2");
+ /* act */
+ arser.parse(prompt.argc(), prompt.argv());
+ /* assert */
+ EXPECT_TRUE(arser["--specify"]);
+
+ EXPECT_THROW(arser.get<float>("--specify"), std::runtime_error);
+}
diff --git a/compiler/circle-opselector/CMakeLists.txt b/compiler/circle-opselector/CMakeLists.txt
new file mode 100644
index 000000000..93ab84c09
--- /dev/null
+++ b/compiler/circle-opselector/CMakeLists.txt
@@ -0,0 +1,36 @@
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_executable(circle-opselector ${DRIVER} ${SOURCES})
+target_include_directories(circle-opselector PRIVATE src)
+target_link_libraries(circle-opselector foder)
+target_link_libraries(circle-opselector safemain)
+target_link_libraries(circle-opselector loco)
+target_link_libraries(circle-opselector luci_import)
+target_link_libraries(circle-opselector luci_export)
+target_link_libraries(circle-opselector arser)
+target_link_libraries(circle-opselector vconone)
+target_link_libraries(circle-opselector luci_service)
+target_link_libraries(circle-opselector luci_profile)
+
+install(TARGETS circle-opselector DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(circle-opselector-test ${TESTS} ${SOURCES} ${DRIVER})
+target_include_directories(circle-opselector-test PRIVATE src)
+target_link_libraries(circle-opselector-test foder)
+target_link_libraries(circle-opselector-test loco)
+target_link_libraries(circle-opselector-test luci_import)
+target_link_libraries(circle-opselector-test luci_export)
+target_link_libraries(circle-opselector-test arser)
+target_link_libraries(circle-opselector-test vconone)
+target_link_libraries(circle-opselector-test luci_service)
+target_link_libraries(circle-opselector-test luci_profile)
diff --git a/compiler/circle-opselector/README.md b/compiler/circle-opselector/README.md
new file mode 100644
index 000000000..c06899ab5
--- /dev/null
+++ b/compiler/circle-opselector/README.md
@@ -0,0 +1,21 @@
+# circle-opselector
+
+`circle-opselector` is a tool for creating new circle models by selecting nodes from a model.
+
+## Example
+
+### 1. Select from location numbers
+
+```bash
+./circle-opselector --by_id "1-3,5" input.circle output.circle
+```
+
+Then, output.circle which has node 1, 2, 3 and 5 will be created.
+
+### 2. Select from node names
+
+```bash
+./circle-opselector --by_name "Add_1,Sub_1,Concat_2" input.circle output.circle
+```
+
+Then, output.circle which has node Add_1, Sub_1 and Concat_2 will be created.
diff --git a/compiler/circle-opselector/driver/Driver.cpp b/compiler/circle-opselector/driver/Driver.cpp
new file mode 100644
index 000000000..a1ace4f58
--- /dev/null
+++ b/compiler/circle-opselector/driver/Driver.cpp
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleIO.h"
+
+#include <luci/Profile/CircleNodeID.h>
+
+#include <arser/arser.h>
+#include <vconone/vconone.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <cctype>
+#include <numeric>
+#include <sstream>
+
+void print_version(void)
+{
+ std::cout << "circle-opselector version " << vconone::get_string() << std::endl;
+ std::cout << vconone::get_copyright() << std::endl;
+}
+
+std::vector<std::string> split_into_vector(const std::string &str, const char &delim)
+{
+ std::vector<std::string> ret;
+ std::istringstream is(str);
+ for (std::string item; std::getline(is, item, delim);)
+ {
+ ret.push_back(item);
+ }
+
+ // remove empty string
+ ret.erase(std::remove_if(ret.begin(), ret.end(), [](const std::string &s) { return s.empty(); }),
+ ret.end());
+
+ return ret;
+}
+
+bool is_number(const std::string &s)
+{
+ return !s.empty() && std::find_if(s.begin(), s.end(),
+ [](unsigned char c) { return !std::isdigit(c); }) == s.end();
+}
+
+bool is_number(const std::vector<std::string> &vec)
+{
+ for (const auto &s : vec)
+ {
+ if (not::is_number(s))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+/**
+ * @brief Segmentation function for user's '--by_id' input
+ *
+ * @note This function tokenizes the input data.s
+ * First, divide it into ',', and if token has '-', devide it once more into '-'.
+ * For example, if user input is '12,34,56', it is devided into [12,34,56].
+ * If input is '1-2,34,56', it is devided into [[1,2],34,56].
+ * And '-' means range so, if input is '2-7', it means all integer between 2-7.
+ */
+std::vector<uint32_t> split_id_input(const std::string &str)
+{
+ std::vector<uint32_t> by_id;
+
+ // tokenize colon-separated string
+ auto colon_tokens = ::split_into_vector(str, ',');
+ if (colon_tokens.empty()) // input empty line like "".
+ {
+ std::cerr << "ERROR: Nothing was entered." << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ for (const auto &ctok : colon_tokens)
+ {
+ auto dash_tokens = ::split_into_vector(ctok, '-');
+ if (not::is_number(dash_tokens))
+ {
+ std::cerr << "ERROR: To select operator by id, please use these args: [0-9], '-', ','"
+ << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ // convert string into integer
+ std::vector<uint32_t> int_tokens;
+ try
+ {
+ std::transform(dash_tokens.begin(), dash_tokens.end(), std::back_inserter(int_tokens),
+ [](const std::string &str) { return static_cast<uint32_t>(std::stoi(str)); });
+ }
+ catch (const std::out_of_range &)
+ {
+ // if input is big integer like '123467891234', stoi throw this exception.
+ std::cerr << "ERROR: Argument is out of range." << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ catch (...)
+ {
+ std::cerr << "ERROR: Unknown error" << std::endl;
+ exit(EXIT_FAILURE);
+ }
+
+ switch (int_tokens.size())
+ {
+ case 0: // inputs like "-"
+ {
+ std::cerr << "ERROR: Nothing was entered" << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ case 1: // inputs like "1", "2"
+ {
+ by_id.push_back(int_tokens.at(0));
+ break;
+ }
+ case 2: // inputs like "1-2", "11-50"
+ {
+ for (uint32_t i = int_tokens.at(0); i <= int_tokens.at(1); i++)
+ {
+ by_id.push_back(i);
+ }
+ break;
+ }
+ default: // inputs like "1-2-3"
+ {
+ std::cerr << "ERROR: Too many '-' in str." << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+
+ return by_id;
+}
+
+std::vector<std::string> split_name_input(const std::string &str)
+{
+ return ::split_into_vector(str, ',');
+}
+
+int entry(int argc, char **argv)
+{
+ // TODO Add new option names!
+
+ arser::Arser arser("circle-opselector provides selecting operations in circle model");
+
+ arser.add_argument("--version")
+ .nargs(0)
+ .default_value(false)
+ .help("Show version information and exit")
+ .exit_with(print_version);
+
+ // TODO Add new options!
+
+ arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
+ arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+
+ // select option
+ arser.add_argument("--by_id")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("Input operation id to select nodes.");
+ arser.add_argument("--by_name")
+ .nargs(1)
+ .type(arser::DataType::STR)
+ .help("Input operation name to select nodes.");
+
+ try
+ {
+ arser.parse(argc, argv);
+ }
+ catch (const std::runtime_error &err)
+ {
+ std::cerr << err.what() << std::endl;
+ std::cout << arser;
+ return EXIT_FAILURE;
+ }
+
+ std::string input_path = arser.get<std::string>("input");
+ std::string output_path = arser.get<std::string>("output");
+
+ std::string operator_input;
+
+ std::vector<uint32_t> by_id;
+ std::vector<std::string> by_name;
+
+ if (!arser["--by_id"] && !arser["--by_name"] || arser["--by_id"] && arser["--by_name"])
+ {
+ std::cerr << "ERROR: Either option '--by_id' or '--by_name' must be specified" << std::endl;
+ std::cerr << arser;
+ return EXIT_FAILURE;
+ }
+
+ if (arser["--by_id"])
+ {
+ operator_input = arser.get<std::string>("--by_id");
+ by_id = split_id_input(operator_input);
+ }
+ if (arser["--by_name"])
+ {
+ operator_input = arser.get<std::string>("--by_name");
+ by_name = split_name_input(operator_input);
+ }
+
+ // Import original circle file.
+ auto module = opselector::getModule(input_path);
+
+ // Select nodes from user input.
+ std::vector<const luci::CircleNode *> selected_nodes;
+
+ // put selected nodes into vector.
+ if (by_id.size())
+ {
+ loco::Graph *graph = module.get()->graph(0); // get main subgraph.
+
+ for (auto node : loco::all_nodes(graph))
+ {
+ auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+
+ try
+ {
+ auto node_id = luci::get_node_id(cnode); // if the node is not operator, throw runtime_error
+
+ for (auto selected_id : by_id)
+ if (selected_id == node_id) // find the selected id
+ selected_nodes.emplace_back(cnode);
+ }
+ catch (std::runtime_error)
+ {
+ continue;
+ }
+ }
+ }
+ if (by_name.size())
+ {
+ loco::Graph *graph = module.get()->graph(0); // get main subgraph.
+
+ for (auto node : loco::all_nodes(graph))
+ {
+ auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+ std::string node_name = cnode->name();
+
+ for (auto selected_name : by_name)
+ if (selected_name.compare(node_name) == 0) // find the selected name
+ selected_nodes.emplace_back(cnode);
+ }
+ }
+ if (selected_nodes.size() == 0)
+ {
+ std::cerr << "ERROR: No operator selected" << std::endl;
+ exit(EXIT_FAILURE);
+ }
+ // TODO implement node selections
+
+ // Export to output Circle file
+ assert(opselector::exportModule(module.get(), output_path));
+
+ return 0;
+}
diff --git a/compiler/circle-opselector/requires.cmake b/compiler/circle-opselector/requires.cmake
new file mode 100644
index 000000000..dcdbcbb68
--- /dev/null
+++ b/compiler/circle-opselector/requires.cmake
@@ -0,0 +1,6 @@
+require("foder")
+require("loco")
+require("safemain")
+require("luci")
+require("arser")
+require("vconone")
diff --git a/compiler/circle-opselector/src/Driver.test.cpp b/compiler/circle-opselector/src/Driver.test.cpp
new file mode 100644
index 000000000..6e569085e
--- /dev/null
+++ b/compiler/circle-opselector/src/Driver.test.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Driver.test.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+TEST(DriverTest, NoArg_NEG)
+{
+ Argv<1> argv;
+ argv.add("circle-opselector");
+
+ ::testing::internal::CaptureStderr();
+ ::testing::internal::CaptureStdout();
+ int result = entry(1, argv.argv());
+ ::testing::internal::GetCapturedStdout();
+ ASSERT_EQ(EXIT_FAILURE, result);
+}
+
+TEST(DriverTest, Wrong_ID_NEG)
+{
+ std::string str1 = "1";
+ std::string empty = "";
+ std::string no_integer = "1531538X5";
+
+ ASSERT_EQ(true, is_number(str1));
+ ASSERT_EQ(false, is_number(empty));
+ ASSERT_EQ(false, is_number(no_integer));
+}
+
+TEST(DriverTest, Split)
+{
+ std::vector<uint32_t> vec1;
+ std::vector<uint32_t> vec2;
+
+ std::string hyphen = "1-3,8-10";
+ std::string comma = "1,2,3";
+
+ vec1.push_back(1);
+ vec1.push_back(2);
+ vec1.push_back(3);
+ vec1.push_back(8);
+ vec1.push_back(9);
+ vec1.push_back(10);
+
+ vec2.push_back(1);
+ vec2.push_back(2);
+ vec2.push_back(3);
+
+ ASSERT_EQ(vec1, split_id_input(hyphen));
+ ASSERT_EQ(vec2, split_id_input(comma));
+}
diff --git a/compiler/circle-opselector/src/Driver.test.h b/compiler/circle-opselector/src/Driver.test.h
new file mode 100644
index 000000000..06f151649
--- /dev/null
+++ b/compiler/circle-opselector/src/Driver.test.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPSELECTOR_DRIVER_TEST_H__
+#define __CIRCLE_OPSELECTOR_DRIVER_TEST_H__
+
+#include <vector>
+#include <string>
+
+int entry(int argc, char **argv);
+bool is_number(const std::string &s);
+std::vector<uint32_t> split_id_input(const std::string &str);
+
+#endif // __CIRCLE_OPSELECTOR_DRIVER_TEST_H__
diff --git a/compiler/circle-opselector/src/ModuleIO.cpp b/compiler/circle-opselector/src/ModuleIO.cpp
new file mode 100644
index 000000000..46f45ceb0
--- /dev/null
+++ b/compiler/circle-opselector/src/ModuleIO.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleIO.h"
+
+#include <foder/FileLoader.h>
+
+#include <luci/Importer.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+
+#include <iostream>
+
+namespace opselector
+{
+
+std::unique_ptr<luci::Module> getModule(std::string &input_path)
+{
+ // Load model from the file
+ foder::FileLoader file_loader{input_path};
+ std::vector<char> model_data = file_loader.load();
+
+ // Verify flatbuffers
+ flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+ if (!circle::VerifyModelBuffer(verifier))
+ {
+ std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+ exit(EXIT_FAILURE);
+ }
+
+ const circle::Model *circle_model = circle::GetModel(model_data.data());
+ if (circle_model == nullptr)
+ {
+ std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+ exit(EXIT_FAILURE);
+ }
+
+ // Import from input Circle file
+ luci::Importer importer;
+
+ return importer.importModule(circle_model);
+}
+
+bool exportModule(luci::Module *module, std::string &output_path)
+{
+ luci::CircleExporter exporter;
+
+ luci::CircleFileExpContract contract(module, output_path);
+
+ if (!exporter.invoke(&contract))
+ {
+ std::cerr << "ERROR: Failed to export '" << output_path << "'" << std::endl;
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace opselector
diff --git a/compiler/circle-opselector/src/ModuleIO.h b/compiler/circle-opselector/src/ModuleIO.h
new file mode 100644
index 000000000..39c704bf3
--- /dev/null
+++ b/compiler/circle-opselector/src/ModuleIO.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPSELECTOR_MODULEIO_H__
+#define __CIRCLE_OPSELECTOR_MODULEIO_H__
+
+#include <luci/IR/Module.h>
+
+#include <string>
+#include <memory>
+
+namespace opselector
+{
+
+std::unique_ptr<luci::Module> getModule(std::string &input_path);
+bool exportModule(luci::Module *module, std::string &output_path);
+
+} // namespace opselector
+
+#endif // __CIRCLE_OPSELECTOR_MODULEIO_H__
diff --git a/compiler/circle-opselector/src/ModuleIO.test.cpp b/compiler/circle-opselector/src/ModuleIO.test.cpp
new file mode 100644
index 000000000..a1e5c2070
--- /dev/null
+++ b/compiler/circle-opselector/src/ModuleIO.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleIO.h"
+
+#include <gtest/gtest.h>
+
+TEST(ModuleIOTest, Export_nullptr)
+{
+ std::string output_path = "./test.out.circle";
+
+ ASSERT_EQ(false, opselector::exportModule(nullptr, output_path));
+}
diff --git a/compiler/circle-opselector/src/TestHelper.h b/compiler/circle-opselector/src/TestHelper.h
new file mode 100644
index 000000000..966e2b219
--- /dev/null
+++ b/compiler/circle-opselector/src/TestHelper.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPSELECTOR_TEST_HELPER_H__
+#define __CIRCLE_OPSELECTOR_TEST_HELPER_H__
+
+#include <cassert>
+#include <string.h>
+
+template <size_t N> class Argv
+{
+public:
+ typedef char *pchar_t;
+
+public:
+ ~Argv()
+ {
+ for (size_t n = 0; n < _ptr; ++n)
+ delete _argv[n];
+ }
+
+ void add(const char *in)
+ {
+ assert(_ptr < N);
+ _argv[_ptr] = new char[strlen(in) + 1];
+ strncpy(_argv[_ptr], in, strlen(in) + 1);
+ _ptr++;
+ }
+
+ pchar_t *argv(void) { return _argv; }
+
+private:
+ pchar_t _argv[N] = {
+ nullptr,
+ };
+ size_t _ptr = 0;
+};
+
+#endif // __CIRCLE_OPSELECTOR_TEST_HELPER_H__
diff --git a/compiler/circle-part-value-test/CMakeLists.txt b/compiler/circle-part-value-test/CMakeLists.txt
index b4b1b19db..1cfbcbd9b 100644
--- a/compiler/circle-part-value-test/CMakeLists.txt
+++ b/compiler/circle-part-value-test/CMakeLists.txt
@@ -106,7 +106,7 @@ add_dependencies(circle_part_value_test_prepare common_artifacts_deps)
add_test(NAME circle_part_value_test
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/part_eval_all.sh"
"${CMAKE_CURRENT_BINARY_DIR}"
- "${NNCC_OVERLAY_DIR}/venv_2_3_0"
+ "${NNCC_OVERLAY_DIR}/venv_2_6_0"
"$<TARGET_FILE:circle_part_driver>"
${PARTITION_LIST}
)
diff --git a/compiler/circle-partitioner/README.md b/compiler/circle-partitioner/README.md
index e1a0258dc..5fd312e33 100644
--- a/compiler/circle-partitioner/README.md
+++ b/compiler/circle-partitioner/README.md
@@ -49,8 +49,8 @@ DIV=acl_cl
- `backends`: Existing partition group names which nodes should be placed, in CSV format.
- `default`: Default group name which should be one of `backends` item.
- `comply`: How to group nodes of the model.
- - currently `opcode` is supported
- - future work: set group by node name or sequence number.
+ - currently `opcode` and `opname` are supported
+ - future work: set group by sequence number.
##### `[OPCODE`] section
diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp
index 5e717d085..1a09a8a2a 100644
--- a/compiler/circle-quantizer/src/CircleQuantizer.cpp
+++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp
@@ -43,6 +43,7 @@ void print_exclusive_options(void)
std::cout << " --quantize_dequantize_weights" << std::endl;
std::cout << " --quantize_with_minmax" << std::endl;
std::cout << " --requantize" << std::endl;
+ std::cout << " --force_quantparam" << std::endl;
}
void print_version(void)
@@ -63,6 +64,7 @@ int entry(int argc, char **argv)
const std::string qdqw = "--quantize_dequantize_weights";
const std::string qwmm = "--quantize_with_minmax";
const std::string rq = "--requantize";
+ const std::string fq = "--force_quantparam";
const std::string gpd = "--generate_profile_data";
@@ -105,6 +107,15 @@ int entry(int argc, char **argv)
"Two arguments required: input_dtype(int8) "
"output_dtype(uint8)");
+ arser.add_argument(fq)
+ .nargs(3)
+ .type(arser::DataType::STR_VEC)
+ .required(false)
+ .accumulated(true)
+ .help("Write quantization parameters to the specified tensor. "
+ "Three arguments required: tensor_name(string), "
+ "scale(float) zero_point(int)");
+
arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
@@ -123,10 +134,11 @@ int entry(int argc, char **argv)
}
{
- // only one of qdqw, qwmm, rq option can be used
+ // only one of qdqw, qwmm, rq, fq option can be used
int32_t opt_used = arser[qdqw] ? 1 : 0;
opt_used += arser[qwmm] ? 1 : 0;
opt_used += arser[rq] ? 1 : 0;
+ opt_used += arser[fq] ? 1 : 0;
if (opt_used != 1)
{
print_exclusive_options();
@@ -185,6 +197,34 @@ int entry(int argc, char **argv)
options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
}
+ if (arser[fq])
+ {
+ auto values = arser.get<std::vector<std::vector<std::string>>>(fq);
+
+ std::vector<std::string> tensors;
+ std::vector<std::string> scales;
+ std::vector<std::string> zero_points;
+
+ for (auto const value : values)
+ {
+ if (value.size() != 3)
+ {
+ std::cerr << arser;
+ return 255;
+ }
+
+ tensors.push_back(value[0]);
+ scales.push_back(value[1]);
+ zero_points.push_back(value[2]);
+ }
+
+ options->enable(Algorithms::ForceQuantParam);
+
+ options->params(AlgorithmParameters::Quantize_tensor_names, tensors);
+ options->params(AlgorithmParameters::Quantize_scales, scales);
+ options->params(AlgorithmParameters::Quantize_zero_points, zero_points);
+ }
+
std::string input_path = arser.get<std::string>("input");
std::string output_path = arser.get<std::string>("output");
diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst
index 95822c758..f41aac303 100644
--- a/compiler/circle2circle-dredd-recipe-test/test.lst
+++ b/compiler/circle2circle-dredd-recipe-test/test.lst
@@ -41,6 +41,7 @@ Add(Net_Maximum_Minimum_000 PASS transform_min_max_to_relu6)
Add(BatchMatMulV2_000 PASS resolve_customop_batchmatmul)
Add(MatMul_000 PASS resolve_customop_matmul)
Add(DepthwiseConv2D_003 PASS)
+Add(PadV2_001 PASS substitute_padv2_to_pad)
Add(StridedSlice_003 PASS substitute_strided_slice_to_reshape)
Add(MaxPoolWithArgmax_000 PASS resolve_customop_max_pool_with_argmax)
Add(MaxPoolWithArgmax_001 PASS resolve_customop_max_pool_with_argmax)
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp
index 1998b1646..a5ddb26dc 100644
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -98,6 +98,12 @@ int entry(int argc, char **argv)
.default_value(false)
.help("This will fold dequantize op");
+ arser.add_argument("--fold_dwconv")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fold Depthwise Convolution operator with constant inputs");
+
arser.add_argument("--fold_sparse_to_dense")
.nargs(0)
.required(false)
@@ -116,6 +122,12 @@ int entry(int argc, char **argv)
.default_value(false)
.help("This will fuse Activation function to a preceding operator");
+ arser.add_argument("--fuse_add_with_fully_connected")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will fuse Add operator to FullyConnected operator");
+
arser.add_argument("--fuse_add_with_tconv")
.nargs(0)
.required(false)
@@ -282,6 +294,12 @@ int entry(int argc, char **argv)
.default_value(false)
.help("This will convert certain condition PadV2 to Pad");
+ arser.add_argument("--substitute_splitv_to_split")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will convert certain condition SplitV to Split operator");
+
arser.add_argument("--substitute_squeeze_to_reshape")
.nargs(0)
.required(false)
@@ -300,6 +318,12 @@ int entry(int argc, char **argv)
.default_value(false)
.help("This will convert single input Transpose to Reshape");
+ arser.add_argument("--expand_broadcast_const")
+ .nargs(0)
+ .required(false)
+ .default_value(false)
+ .help("This will expand broadcastable constant inputs");
+
arser.add_argument("--convert_nchw_to_nhwc")
.nargs(0)
.required(false)
@@ -426,6 +450,8 @@ int entry(int argc, char **argv)
options->enable(Algorithms::FoldCast);
if (arser.get<bool>("--fold_dequantize"))
options->enable(Algorithms::FoldDequantize);
+ if (arser.get<bool>("--fold_dwconv"))
+ options->enable(Algorithms::FoldDepthwiseConv2D);
if (arser.get<bool>("--fold_sparse_to_dense"))
options->enable(Algorithms::FoldSparseToDense);
if (arser.get<bool>("--forward_reshape_to_unaryop"))
@@ -434,6 +460,8 @@ int entry(int argc, char **argv)
options->enable(Algorithms::FuseActivationFunction);
if (arser.get<bool>("--fuse_batchnorm_with_conv"))
options->enable(Algorithms::FuseBatchNormWithConv);
+ if (arser.get<bool>("--fuse_add_with_fully_connected"))
+ options->enable(Algorithms::FuseAddWithFullyConnected);
if (arser.get<bool>("--fuse_add_with_tconv"))
options->enable(Algorithms::FuseAddWithTConv);
if (arser.get<bool>("--fuse_batchnorm_with_dwconv"))
@@ -486,6 +514,8 @@ int entry(int argc, char **argv)
options->enable(Algorithms::SubstitutePackToReshape);
if (arser.get<bool>("--substitute_padv2_to_pad"))
options->enable(Algorithms::SubstitutePadV2ToPad);
+ if (arser.get<bool>("--substitute_splitv_to_split"))
+ options->enable(Algorithms::SubstituteSplitVToSplit);
if (arser.get<bool>("--substitute_squeeze_to_reshape"))
options->enable(Algorithms::SubstituteSqueezeToReshape);
if (arser.get<bool>("--substitute_strided_slice_to_reshape"))
@@ -496,6 +526,8 @@ int entry(int argc, char **argv)
options->enable(Algorithms::TransformMinMaxToRelu6Pass);
if (arser.get<bool>("--transform_min_relu_to_relu6"))
options->enable(Algorithms::TransformMinReluToRelu6Pass);
+ if (arser.get<bool>("--expand_broadcast_const"))
+ options->enable(Algorithms::ExpandBroadcastConst);
if (arser.get<bool>("--mute_warnings"))
settings->set(luci::UserSettings::Key::MuteWarnings, true);
diff --git a/compiler/circledump/CMakeLists.txt b/compiler/circledump/CMakeLists.txt
index 8ef68370d..7848ac722 100644
--- a/compiler/circledump/CMakeLists.txt
+++ b/compiler/circledump/CMakeLists.txt
@@ -11,6 +11,6 @@ target_include_directories(circledump PRIVATE include)
target_link_libraries(circledump arser)
target_link_libraries(circledump mio_circle)
target_link_libraries(circledump safemain)
-target_link_libraries(circledump flatbuffers)
+target_link_libraries(circledump flatbuffers-1.10)
install(TARGETS circledump DESTINATION bin)
diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt
index edca29b34..6de634a25 100644
--- a/compiler/common-artifacts/CMakeLists.txt
+++ b/compiler/common-artifacts/CMakeLists.txt
@@ -17,6 +17,8 @@ set(VIRTUALENV_OVERLAY_TF_1_13_2 "${NNCC_OVERLAY_DIR}/venv_1_13_2")
# Create python virtual environment with tensorflow 2.3.0
set(VIRTUALENV_OVERLAY_TF_2_3_0 "${NNCC_OVERLAY_DIR}/venv_2_3_0")
+# Create python virtual environment with tensorflow 2.6.0
+set(VIRTUALENV_OVERLAY_TF_2_6_0 "${NNCC_OVERLAY_DIR}/venv_2_6_0")
add_custom_command(
OUTPUT ${VIRTUALENV_OVERLAY_TF_1_13_2}
@@ -27,11 +29,16 @@ add_custom_command(
OUTPUT ${VIRTUALENV_OVERLAY_TF_2_3_0}
COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_3_0}
)
+add_custom_command(
+ OUTPUT ${VIRTUALENV_OVERLAY_TF_2_6_0}
+ COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_6_0}
+)
# Create requirements.txt and install required pip packages
set(REQUIREMENTS_FILE "requirements.txt")
set(REQUIREMENTS_OVERLAY_PATH_TF_1_13_2 "${VIRTUALENV_OVERLAY_TF_1_13_2}/${REQUIREMENTS_FILE}")
set(REQUIREMENTS_OVERLAY_PATH_TF_2_3_0 "${VIRTUALENV_OVERLAY_TF_2_3_0}/${REQUIREMENTS_FILE}")
+set(REQUIREMENTS_OVERLAY_PATH_TF_2_6_0 "${VIRTUALENV_OVERLAY_TF_2_6_0}/${REQUIREMENTS_FILE}")
# TODO remove version number of '--upgrade pip==20.2.1 setuptools==49.3.0'
# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
@@ -53,8 +60,23 @@ add_custom_command(
DEPENDS ${VIRTUALENV_OVERLAY_TF_2_3_0}
)
+add_custom_command(
+ OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
+ COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
+ COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.6.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
+ COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
+ COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
+ COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} --upgrade
+ DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0}
+)
+
add_custom_target(common_artifacts_python_deps ALL
- DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2} ${VIRTUALENV_OVERLAY_TF_2_3_0} ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+ DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2}
+ ${VIRTUALENV_OVERLAY_TF_2_3_0}
+ ${VIRTUALENV_OVERLAY_TF_2_6_0}
+ ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
+ ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+ ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
)
#[[ Generate common resources ]]
diff --git a/compiler/enco/frontend/tflite/CMakeLists.txt b/compiler/enco/frontend/tflite/CMakeLists.txt
index ea10fbc4b..b2de2b34b 100644
--- a/compiler/enco/frontend/tflite/CMakeLists.txt
+++ b/compiler/enco/frontend/tflite/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers QUIET)
+nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
if(NOT FlatBuffers_FOUND)
return()
@@ -17,7 +17,7 @@ add_library(enco_tflite_frontend SHARED ${SOURCES})
target_include_directories(enco_tflite_frontend PRIVATE src)
target_link_libraries(enco_tflite_frontend enco_intf_frontend)
target_link_libraries(enco_tflite_frontend enco_intf_cmdline)
-target_link_libraries(enco_tflite_frontend flatbuffers)
+target_link_libraries(enco_tflite_frontend flatbuffers-1.10)
target_link_libraries(enco_tflite_frontend enco_tflite_schema)
target_link_libraries(enco_tflite_frontend morph)
target_link_libraries(enco_tflite_frontend cwrap)
diff --git a/compiler/exo/CMakeLists.txt b/compiler/exo/CMakeLists.txt
index e686cbb83..9d02f7cba 100644
--- a/compiler/exo/CMakeLists.txt
+++ b/compiler/exo/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers QUIET)
+nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
if(NOT FlatBuffers_FOUND)
message(STATUS "Build exo: FALSE (missing FlatBuffers)")
diff --git a/compiler/luci-interpreter/CMakeLists.txt b/compiler/luci-interpreter/CMakeLists.txt
index ab4ec1f43..1f7acee87 100644
--- a/compiler/luci-interpreter/CMakeLists.txt
+++ b/compiler/luci-interpreter/CMakeLists.txt
@@ -4,4 +4,12 @@ if (NOT LUCI_INTERPRETER_PAL_DIR)
set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/linux")
endif()
+set(KERNEL_REGISTER_FILE ${LUCI_INTERPRETER_PAL_DIR}/KernelsToBuild.lst)
+
+if (NOT DEFINED CUSTOM_LUCI_INTERPRETER_SUFFIX)
+ set(LUCI_INTERPRETER_SUFFIX "")
+else()
+ set(LUCI_INTERPRETER_SUFFIX ${CUSTOM_LUCI_INTERPRETER_SUFFIX})
+endif()
+
add_subdirectory(src)
diff --git a/compiler/luci-interpreter/README.md b/compiler/luci-interpreter/README.md
new file mode 100644
index 000000000..4a9a34e6d
--- /dev/null
+++ b/compiler/luci-interpreter/README.md
@@ -0,0 +1,158 @@
+# luci-interpreter
+
+`luci-interpreter` is an inference engine for neural networks represented in luci IR.
+See `compiler/luci/lang` directory for details about IR.
+You can find useful infrastructure, like importer/exporter, optimizations in `compiler/luci`.
+
+`luci-interpreter` provides:
+- Basic inference functionality, input setters and output getters
+- Interface for inspecting hidden interpreter state, like activation values during inference
+- Customization mechanisms to fit the interpreter to specific platforms, like MCUs
+
+Public interface headers are placed in `luci-interpreter/include/luci_interpreter` directory
+
+## Basic usage
+
+Minimal usage includes:
+- Setting input data
+- Running inference
+- Fetching inference results
+
+Interpreter object is reusable and can run multiple inferences.
+Elements in tensors (input/output/internal) are stored contiguously and have C-like layout:
+This means for tensor t=[[0, 1],[2, 3]], t[0,1] == 1.
+
+Input and output tensors have the same indexes as in original luci model.
+
+**Usage example:**
+``` c++
+// Note getTensorSize is a function that computes tensor size,
+// it is not part of interpreter and should be implemented by user
+
+luci_interpreter::Interpreter interpreter(luci_module);
+
+// Set inputs
+// assuming model has only one input and one output
+const auto input_nodes = loco::input_nodes(module->graph());
+
+const auto *input_node = dynamic_cast<const luci::CircleInput *>(input_nodes[0]);
+std::vector<char> input_data(getTensorSize(input_node));
+// Initialize input data here
+
+interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+
+// Start inference
+interpreter.interpret();
+
+// Fetch inference results
+const auto output_nodes = loco::output_nodes(module->graph());
+const auto *output_node = dynamic_cast<const luci::CircleOutput *>(output_nodes[0]);
+std::vector<char> output_data(getTensorSize(output_node));
+interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+```
+
+## Inspecting intermediate state
+
+Interpreter provides interfaces to investigate internal state of interpreter during inference.
+
+This is done by "observer" mechanism:
+- `Interpreter` class has `attachObserver` method, which takes pointer to `ExecutionObserver` object
+- `ExecutionObserver` defines several callback methods user can override to inject custom code
+
+ExecutionObserver provides three callbacks:
+- `postTensorWrite` checks contents of output tensor after operation execution
+- `preOperatorExecute` notifies that interpreter is going to execute operation
+- `postOperatorExecute` notifies that interpreter has finished execution of an operation
+
+See `luci-interpreter/include/luci_interpreter/Interpreter.h` for this interface details.
+
+**Usage example:**
+``` c++
+class CustomExecutionObserver: public luci_interpreter::ExecutionObserver
+{
+public:
+ void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor) override
+ {
+ if (tensor->element_type() != loco::DataType::FLOAT32)
+ return;
+ for (int i = 0; i < tensor->shape().num_elements(); ++i)
+ std::cout << tensor->data<float>[i] << ", ";
+ }
+
+ // User observer can override only needed methods,
+ // others will inherit empty implementation from base observer.
+
+ // void preOperatorExecute(const luci::CircleNode *node);
+ // void postOperatorExecute(const luci::CircleNode *node);
+};
+
+luci_interpreter::Interpreter interpreter(module);
+CustomExecutionObserver observer;
+interpreter.attachObserver(&observer);
+
+// initialize input_data
+interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+
+interpreter.interpret();
+```
+
+## Customizing inference
+
+### Memory manager
+
+Interpreter provides a handle for altering default memory management mechanisms.
+
+This is done by `MemoryManger` interface, see `luci-interpreter/include/luci_interpreter/MemoryManager.h` for implementation details.
+
+This header contains `IMemoryManager` abstract class which is responsible for allocation and dealocation of tensors' memory.
+
+User can construct an interpreter with one of predefined memory managers or their own custom memory manager.
+Note that one memory manager could be shared between multiple interpreter instances, because an interpreter does not own the manager object.
+
+List of predefined memory managers:
+- `SimpleMemoryManager` This is a simple wrapper around new/delete, default one.
+- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager desctuctor, used in kernel unit tests.
+- `BuddyMemoryManager` Implements Buddy algorithm, uses external buffer for tensor data allocations, does not need new/delete.
+- `StaticMemoryManger` Uses precomputed memory allocation plan. Requires preparation with MemoryPlanner, but could reduce memory consumption in restricted environments (like MCUs).
+
+**SimpleMemoryManager usage example:**
+
+No need to select anything, to use this memory manager.
+``` c++
+luci_interpreter::Interpreter interpreter(module);
+```
+
+**TestMemoryManager usage example:**
+
+``` c++
+luci_interpreter::TestMemoryManager mm;
+luci_interpreter::Interpreter interpreter(module, &mm);
+```
+
+**BuddyMemoryManager usage example:**
+
+`BuddyMemoryManager` implements a classic allocation algorithm: https://en.wikipedia.org/wiki/Buddy_memory_allocation.
+
+This allocator uses an external buffer as a memory pool. That allows to use static memory arrays for allocations.
+
+Limitations
+- Current implementation uses only lower power-of-two bytes of given buffer.
+
+ For example for 1000 bytes buffer, only lower 512 bytes will be used.
+- Current implementation can handle maximum 4 gigabyte memory pool
+
+``` c++
+ constexpr int buffer_size = 2048;
+ static uint8_t buffer[buffer_size];
+ luci_interpreter::BuddyMemoryManager memory_manager(buffer, buffer_size);
+ luci_interpreter::Interpreter interpreter(module.get(), &memory_manager);
+```
+
+**StaticMemoryManager usage example:**
+``` c++
+TBD when it is merged
+```
+
+## Further reading
+
+If you want to participate in development, please read `DEVELOPER.md` for SW architecture details.
diff --git a/compiler/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h
new file mode 100644
index 000000000..205baa626
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h
@@ -0,0 +1,144 @@
+/* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/MemoryManager.h"
+
+#ifndef LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+
+namespace luci_interpreter
+{
+
+class BuddyMemoryManager : public IMemoryManager
+{
+public:
+ BuddyMemoryManager(uint8_t *memory_start, int32_t memSize);
+
+ void allocate_memory(luci_interpreter::Tensor &tensor) final;
+ void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+ struct Block
+ {
+ Block *next_free;
+ bool is_free;
+ uint32_t size;
+ // debug field
+ Block *self;
+ };
+
+ Block *_start_block;
+ int32_t _num_blocks;
+ uint32_t _size;
+ Block *_free_blocks[32]{};
+
+ static int32_t lowerLog2(uint32_t val)
+ {
+ int32_t i = 0;
+ while (val >>= 1)
+ i++;
+
+ return i;
+ }
+
+ void addToBlocks(Block *block, int32_t l)
+ {
+ if (!block)
+ return;
+
+ block->next_free = _free_blocks[l];
+ _free_blocks[l] = block;
+ }
+
+ void removeFromBlocks(const Block *block, int32_t l)
+ {
+ if (!block)
+ return;
+
+ Block *tmp = _free_blocks[l];
+
+ if (block == tmp)
+ {
+ _free_blocks[l] = block->next_free;
+ return;
+ }
+
+ while (tmp)
+ {
+ if (tmp->next_free == block)
+ {
+ tmp->next_free = block->next_free;
+ return;
+ }
+
+ tmp = tmp->next_free;
+ }
+ }
+
+ void divideBlock(Block *block, int32_t l)
+ {
+ int32_t size = ((block->size + sizeof(Block)) / 2) - sizeof(Block);
+
+ removeFromBlocks(block, l);
+
+ // there is no need to add to the free_blocks list here
+ block->is_free = true;
+ block->size = size;
+ block->self = block;
+
+ Block *buddy;
+ buddy = (Block *)((uint8_t *)block + sizeof(Block) + size);
+ buddy->is_free = true;
+ buddy->size = size;
+ buddy->self = buddy;
+
+ addToBlocks(buddy, l - 1);
+ }
+
+ Block *mergeBlock(Block *block)
+ {
+ Block *buddy;
+
+ const int32_t l = lowerLog2(block->size + sizeof(Block));
+
+ const int64_t address = ((uint8_t *)block - (uint8_t *)_start_block);
+ buddy = (Block *)((address ^ (1 << l)) + (uint8_t *)_start_block);
+
+ if (!buddy->is_free || buddy->size != block->size)
+ return nullptr;
+
+ if (block > buddy)
+ {
+ Block *x = block;
+ block = buddy;
+ buddy = x;
+ }
+
+ removeFromBlocks(block, l);
+ removeFromBlocks(buddy, l);
+
+ block->size = block->size * 2 + sizeof(Block);
+ block->is_free = true;
+ block->self = block;
+
+ addToBlocks(block, l + 1);
+
+ return block;
+ }
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
index 7a14bf6f8..7dee8a7f2 100644
--- a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
+++ b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
@@ -22,6 +22,7 @@
#include <luci/IR/Nodes/CircleInput.h>
#include <luci/IR/Nodes/CircleOutput.h>
+#include "luci_interpreter/MemoryManager.h"
#include <luci/IR/Module.h>
#include <memory>
@@ -49,7 +50,7 @@ public:
class Interpreter
{
public:
- explicit Interpreter(const luci::Module *module);
+ explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager = nullptr);
~Interpreter();
@@ -64,7 +65,11 @@ public:
const Tensor *getTensor(const loco::Node *node) { return _node_to_tensor[node]; }
private:
+ // _default_memory_manager should be before _runtime_module due to
+ // the order of deletion in the destructor
+ std::unique_ptr<IMemoryManager> _default_memory_manager = nullptr;
std::unique_ptr<class RuntimeModule> _runtime_module;
+ IMemoryManager *_memory_manager = nullptr;
// Observer functionality support.
std::unique_ptr<struct RuntimeToIR> _runtime_to_ir;
diff --git a/compiler/luci-interpreter/include/luci_interpreter/MemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/MemoryManager.h
new file mode 100644
index 000000000..f32c52095
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/MemoryManager.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_MEMORY_MANAGER_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/Tensor.h"
+
+namespace luci_interpreter
+{
+
+class IMemoryManager
+{
+public:
+ virtual void allocate_memory(luci_interpreter::Tensor &tensor) = 0;
+ virtual void release_memory(luci_interpreter::Tensor &tensor) = 0;
+
+ virtual ~IMemoryManager() = default;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h
new file mode 100644
index 000000000..658a1c609
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+
+class SimpleMemoryManager : public IMemoryManager
+{
+public:
+ void allocate_memory(luci_interpreter::Tensor &tensor) final;
+ void release_memory(luci_interpreter::Tensor &tensor) final;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h
new file mode 100644
index 000000000..ded7bde79
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+
+// Used for allocations in static buffer, using offsets defined in luci model.
+class StaticMemoryManager : public IMemoryManager
+{
+public:
+ StaticMemoryManager() = delete;
+
+ explicit StaticMemoryManager(uint8_t *buffer_ptr) : _buffer_ptr(buffer_ptr)
+ { /* Do nothing */
+ }
+
+ void allocate_memory(luci_interpreter::Tensor &tensor) final;
+ void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+ // Stores a pointer to the beginning of the allocated memory buffer.
+ uint8_t *_buffer_ptr;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/TestMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/TestMemoryManager.h
new file mode 100644
index 000000000..397bbed76
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/TestMemoryManager.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+// Memory Manager for using in kernels tests. This eliminates the need to manually delete the
+// allocated memory in tests. This mem_manager remembers all its allocations and in destructor
+// delete all allocations.
+class TestMemoryManager : public IMemoryManager
+{
+public:
+ void allocate_memory(luci_interpreter::Tensor &tensor) final;
+ void release_memory(luci_interpreter::Tensor &tensor) final;
+
+ ~TestMemoryManager() override
+ {
+ for (auto allocation : allocations)
+ {
+ delete[] allocation;
+ }
+ }
+
+private:
+ std::vector<uint8_t *> allocations;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
index e356bce92..bb9ff6d4a 100644
--- a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
+++ b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
@@ -107,9 +107,6 @@ public:
return _quantization.zero_point[0];
}
- void allocate();
- void deallocate();
-
const std::vector<float> &scales() const { return _quantization.scale; }
const std::vector<int32_t> &zero_points() const { return _quantization.zero_point; }
@@ -118,15 +115,16 @@ public:
template <typename T> const T *data() const
{
- assert(_data_allocated);
- return reinterpret_cast<const T *>(_data.get());
+ static_assert(std::is_same<uint8_t, char>::value or
+ std::is_same<uint8_t, unsigned char>::value);
+ return reinterpret_cast<const T *>(_data);
}
template <typename T> T *data()
{
- if (!_data_allocated)
- allocate();
- return reinterpret_cast<T *>(_data.get());
+ static_assert(std::is_same<uint8_t, char>::value or
+ std::is_same<uint8_t, unsigned char>::value);
+ return reinterpret_cast<T *>(_data);
}
const std::string &name() const { return _name; }
@@ -137,13 +135,50 @@ public:
void resize(const Shape &new_shape);
+ void set_data_buffer(uint8_t *buffer)
+ {
+ if (buffer == nullptr)
+ {
+ _data_allocated = false;
+ }
+ else
+ {
+ _data_allocated = true;
+ }
+ _data = buffer;
+ }
+
+ bool is_observable() const { return _is_observable; }
+
+ void set_observable(bool value) { _is_observable = value; }
+
+ bool is_allocatable() const { return _is_allocatable; }
+
+ void set_allocatable(bool value) { _is_allocatable = value; }
+
+ bool is_data_allocated() const { return _data_allocated; }
+
+ int32_t get_offset() const { return _offset; }
+
+ void set_offset(int32_t offset) { _offset = offset; }
+
private:
DataType _element_type;
Shape _shape;
AffineQuantization _quantization;
- std::unique_ptr<uint8_t[]> _data;
+ uint8_t *_data;
std::string _name;
bool _data_allocated;
+ // Write of tensor is reported to registered Observers only if this tensor is observable
+ // This is needed for tensors used in kernel implementation, but not present in original model.
+ bool _is_observable = true;
+ // Memory manager is called for tensor only if it is "allocatable".
+ // Kernel configuration could disable allocation of some tensors if they are not needed for
+ // particular operation.
+ bool _is_allocatable = true;
+ // Used by static memory manager.
+ // Stores the offset from the beginning of the allocated memory buffer.
+ int32_t _offset = -1;
};
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
new file mode 100644
index 000000000..9d541276c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
@@ -0,0 +1,68 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LocalResponseNormalization)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(LogSoftmax)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Mean)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(Pack)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(Pow)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Relu)
+REGISTER_KERNEL(Relu6)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(ReverseV2)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Slice)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(Split)
+REGISTER_KERNEL(SplitV)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(Unpack)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-interpreter/pal/linux/PALArgMax.h b/compiler/luci-interpreter/pal/linux/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+ const T2 *axis, const tflite::RuntimeShape &output_shape,
+ T3 *output_data, const std::greater<T1> cmp)
+{
+ tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-interpreter/pal/linux/PALBatchToSpaceND.h b/compiler/luci-interpreter/pal/linux/PALBatchToSpaceND.h
new file mode 100644
index 000000000..3fe2022ed
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::optimized_ops::BatchToSpaceND(
+ unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-interpreter/pal/linux/PALConv2d.h
new file mode 100644
index 000000000..2550dd5d7
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALConv2d.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &filter_shape,
+ const float *filter_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, const tflite::RuntimeShape &output_shape,
+ float *output_data, const tflite::RuntimeShape &im2col_shape,
+ float *im2col_data)
+{
+ if (im2col_data)
+ {
+ tflite::optimized_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, im2col_shape,
+ im2col_data);
+ }
+ else
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data,
+ tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+ const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+ const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+ uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
+ uint8 *im2col_data)
+{
+ // TODO This should only be done once (although it takes only a few microseconds).
+ // Also, the user should be able to adjust the number of threads.
+ auto gemmlowp_context = std::make_unique<gemmlowp::GemmContext>();
+ gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
+
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, im2col_shape,
+ im2col_data, gemmlowp_context.get());
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+ const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+ const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+ const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+ int8 *output_data, const tflite::RuntimeShape &im2col_shape,
+ int8 *im2col_data)
+{
+ (void)im2col_shape;
+ (void)im2col_data;
+ // TODO enable optimized version
+ tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+ filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALDepthToSpace.h b/compiler/luci-interpreter/pal/linux/PALDepthToSpace.h
new file mode 100644
index 000000000..f9ebfcfb5
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape,
+ const T *input_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::optimized_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALElu.h b/compiler/luci-interpreter/pal/linux/PALElu.h
new file mode 100644
index 000000000..cb365ffd0
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALElu.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::optimized_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-interpreter/pal/linux/PALL2Normalize.h b/compiler/luci-interpreter/pal/linux/PALL2Normalize.h
new file mode 100644
index 000000000..6c663e21f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::optimized_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALL2Pool2D.h b/compiler/luci-interpreter/pal/linux/PALL2Pool2D.h
new file mode 100644
index 000000000..aac57f2b2
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
+{
+ tflite::optimized_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALLeakyRelu.h b/compiler/luci-interpreter/pal/linux/PALLeakyRelu.h
new file mode 100644
index 000000000..e8209bae6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::optimized_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-interpreter/pal/linux/PALLocalResponseNormalization.h b/compiler/luci-interpreter/pal/linux/PALLocalResponseNormalization.h
new file mode 100644
index 000000000..54f7f0916
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALLocalResponseNormalization.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
+#define LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::optimized_ops::LocalResponseNormalization(op_params, input_shape, input_data,
+ output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
diff --git a/compiler/luci-interpreter/pal/linux/PALLogSoftmax.h b/compiler/luci-interpreter/pal/linux/PALLogSoftmax.h
new file mode 100644
index 000000000..a32e3eec6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALLogSoftmax.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
+#define LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+ float beta)
+{
+ tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta);
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+ // Do nothing for linux
+ (void)params;
+ (void)input_scale;
+ (void)beta;
+}
+
+static inline void LogSoftmax(const tflite::SoftmaxParams &params, float input_scale,
+ const tflite::RuntimeShape &input_shape, const uint8 *input_data,
+ const tflite::RuntimeShape &output_shape, uint8 *output_data)
+{
+ tflite::optimized_ops::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
diff --git a/compiler/luci-interpreter/pal/linux/PALMul.h b/compiler/luci-interpreter/pal/linux/PALMul.h
new file mode 100644
index 000000000..cfaec1b58
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const float *input1_data, const tflite::RuntimeShape &input2_shape,
+ const float *input2_data, const tflite::RuntimeShape &output_shape,
+ float *output_data)
+{
+ tflite::optimized_ops::Mul(params, input1_shape, input1_data, input2_shape, input2_data,
+ output_shape, output_data);
+}
+
+static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
+ const tflite::RuntimeShape &input1_shape,
+ const float *input1_data,
+ const tflite::RuntimeShape &input2_shape,
+ const float *input2_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-interpreter/pal/linux/PALNeg.h b/compiler/luci-interpreter/pal/linux/PALNeg.h
new file mode 100644
index 000000000..797ffee1b
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-interpreter/pal/linux/PALRelu.h b/compiler/luci-interpreter/pal/linux/PALRelu.h
new file mode 100644
index 000000000..b4c715d3e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALRelu.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RELU_H
+#define LUCI_INTERPRETER_PAL_RELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Relu(const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::optimized_ops::Relu(input_shape, input_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void ReluX(const tflite::ReluParams &params, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
+{
+ tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RELU_H
diff --git a/compiler/luci-interpreter/pal/linux/PALRelu6.h b/compiler/luci-interpreter/pal/linux/PALRelu6.h
new file mode 100644
index 000000000..bf2f91aa5
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALRelu6.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RELU6_H
+#define LUCI_INTERPRETER_PAL_RELU6_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Relu6(const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::optimized_ops::Relu6(input_shape, input_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void ReluX(const tflite::ReluParams &params, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
+{
+ tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RELU6_H
diff --git a/compiler/luci-interpreter/pal/linux/PALResizeBilinear.h b/compiler/luci-interpreter/pal/linux/PALResizeBilinear.h
new file mode 100644
index 000000000..7380081dc
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/optimized/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::optimized_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+ output_size_shape, output_size_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h b/compiler/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..74d19265b
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::optimized_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+ output_size_shape, output_size_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSlice.h b/compiler/luci-interpreter/pal/linux/PALSlice.h
new file mode 100644
index 000000000..640a71684
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSlice.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SLICE_H
+#define LUCI_INTERPRETER_PAL_SLICE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Slice(const tflite::SliceParams &op_params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::optimized_ops::Slice(op_params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SLICE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSoftmax.h b/compiler/luci-interpreter/pal/linux/PALSoftmax.h
new file mode 100644
index 000000000..b197e79d1
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSoftmax.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+ float beta)
+{
+ tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta);
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+ // Do nothing for linux
+ (void)params;
+ (void)input_scale;
+ (void)beta;
+}
+
+template <typename In, typename Out>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+ const tflite::RuntimeShape &input_shape, const In *input_data,
+ const tflite::RuntimeShape &output_shape, Out *output_data)
+{
+ tflite::optimized_ops::Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSpaceToBatchND.h b/compiler/luci-interpreter/pal/linux/PALSpaceToBatchND.h
new file mode 100644
index 000000000..5e8de9ba3
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+ const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::optimized_ops::SpaceToBatchND(
+ params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSpaceToDepth.h b/compiler/luci-interpreter/pal/linux/PALSpaceToDepth.h
new file mode 100644
index 000000000..52d2a5bb1
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape,
+ const T *input_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::optimized_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSplit.h b/compiler/luci-interpreter/pal/linux/PALSplit.h
new file mode 100644
index 000000000..4d8da72d8
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSplit.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPLIT_H
+#define LUCI_INTERPRETER_PAL_SPLIT_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename Scalar>
+static inline void Split(const tflite::SplitParams &params, const tflite::RuntimeShape &input_shape,
+ const Scalar *input_data, const tflite::RuntimeShape *const *output_shapes,
+ Scalar *const *output_data)
+{
+ tflite::optimized_ops::Split(params, input_shape, input_data, output_shapes, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPLIT_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSub.h b/compiler/luci-interpreter/pal/linux/PALSub.h
new file mode 100644
index 000000000..04080d619
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+ const tflite::RuntimeShape &input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &input2_shape, const T *input2_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::optimized_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+ output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-interpreter/pal/linux/pal.cmake b/compiler/luci-interpreter/pal/linux/pal.cmake
index da880c64c..84349e0bf 100644
--- a/compiler/luci-interpreter/pal/linux/pal.cmake
+++ b/compiler/luci-interpreter/pal/linux/pal.cmake
@@ -1,8 +1,8 @@
macro(initialize_pal)
- nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
- nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
- nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET)
- nnas_find_package(TensorFlowRuySource EXACT 2.3.0 QUIET)
+ nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
if (NOT TensorFlowSource_FOUND)
message(STATUS "Skipping luci-interpreter: TensorFlow not found")
@@ -43,7 +43,12 @@ macro(add_pal_to_target TGT)
set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES})
set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
- target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE "${TensorFlowSource_DIR}")
+ target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}"
+ )
target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_linux_pal)
endmacro()
diff --git a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
new file mode 100644
index 000000000..771974afe
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
@@ -0,0 +1,56 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-interpreter/pal/mcu/PALArgMax.h b/compiler/luci-interpreter/pal/mcu/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+ const T2 *axis, const tflite::RuntimeShape &output_shape,
+ T3 *output_data, const std::greater<T1> cmp)
+{
+ tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALBatchToSpaceND.h b/compiler/luci-interpreter/pal/mcu/PALBatchToSpaceND.h
new file mode 100644
index 000000000..4dd77ffdc
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::BatchToSpaceND(
+ unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALConv2d.h b/compiler/luci-interpreter/pal/mcu/PALConv2d.h
new file mode 100644
index 000000000..0a8ae4e48
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALConv2d.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &filter_shape,
+ const float *filter_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, const tflite::RuntimeShape &output_shape,
+ float *output_data, const tflite::RuntimeShape &im2col_shape,
+ float *im2col_data)
+{
+ (void)im2col_shape;
+ (void)im2col_data;
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data,
+ tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+ const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+ const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+ uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
+ uint8 *im2col_data)
+{
+ (void)im2col_shape;
+ (void)im2col_data;
+ tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data, im2col_shape,
+ im2col_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+ const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+ const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+ const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+ int8 *output_data, const tflite::RuntimeShape &im2col_shape,
+ int8 *im2col_data)
+{
+ (void)im2col_shape;
+ (void)im2col_data;
+ tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+ filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALDepthToSpace.h b/compiler/luci-interpreter/pal/mcu/PALDepthToSpace.h
new file mode 100644
index 000000000..8463e571e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape,
+ const T *input_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALElu.h b/compiler/luci-interpreter/pal/mcu/PALElu.h
new file mode 100644
index 000000000..4089d0a0c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALElu.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/elu.h>
+
+namespace luci_interpreter_pal
+{
+
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALL2Normalize.h b/compiler/luci-interpreter/pal/mcu/PALL2Normalize.h
new file mode 100644
index 000000000..f84742a44
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/reference/l2normalization.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALL2Pool2D.h b/compiler/luci-interpreter/pal/mcu/PALL2Pool2D.h
new file mode 100644
index 000000000..38a302fc6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
+{
+ tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALLeakyRelu.h b/compiler/luci-interpreter/pal/mcu/PALLeakyRelu.h
new file mode 100644
index 000000000..9ccd2224f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALMul.h b/compiler/luci-interpreter/pal/mcu/PALMul.h
new file mode 100644
index 000000000..2b46b100c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/mul.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const float *input1_data, const tflite::RuntimeShape &input2_shape,
+ const float *input2_data, const tflite::RuntimeShape &output_shape,
+ float *output_data)
+{
+ tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data);
+}
+
+static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
+ const tflite::RuntimeShape &input1_shape,
+ const float *input1_data,
+ const tflite::RuntimeShape &input2_shape,
+ const float *input2_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALNeg.h b/compiler/luci-interpreter/pal/mcu/PALNeg.h
new file mode 100644
index 000000000..be5903a0c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/reference/neg.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALResizeBilinear.h b/compiler/luci-interpreter/pal/mcu/PALResizeBilinear.h
new file mode 100644
index 000000000..cc9f0fd54
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+ output_size_shape, output_size_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h b/compiler/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..f4d5a6ed3
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+ output_size_shape, output_size_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSoftmax.h b/compiler/luci-interpreter/pal/mcu/PALSoftmax.h
new file mode 100644
index 000000000..9838b542d
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSoftmax.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+ float beta)
+{
+ // Do nothing for mcu
+ (void)data;
+ (void)input_scale;
+ (void)beta;
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+ int32 input_beta_multiplier;
+ int input_beta_left_shift;
+ static const int kScaledDiffIntegerBits = 5;
+ tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
+ &input_beta_multiplier, &input_beta_left_shift);
+
+ params->input_multiplier = input_beta_multiplier;
+ params->input_left_shift = input_beta_left_shift;
+ params->diff_min =
+ -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift);
+}
+
+template <typename T>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ // MARK: At this moment this operation doesn't support on mcu
+ assert(false && "Softmax NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)output_shape;
+ (void)output_data;
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSpaceToBatchND.h b/compiler/luci-interpreter/pal/mcu/PALSpaceToBatchND.h
new file mode 100644
index 000000000..fdddaa929
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+ const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+ const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::SpaceToBatchND(
+ params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+ unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSpaceToDepth.h b/compiler/luci-interpreter/pal/mcu/PALSpaceToDepth.h
new file mode 100644
index 000000000..816b7f663
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+ const tflite::RuntimeShape &unextended_input_shape,
+ const T *input_data,
+ const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+ tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+ unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSub.h b/compiler/luci-interpreter/pal/mcu/PALSub.h
new file mode 100644
index 000000000..ea57578c6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/reference/sub.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+ const tflite::RuntimeShape &input1_shape, const T *input1_data,
+ const tflite::RuntimeShape &input2_shape, const T *input2_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+ output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-interpreter/pal/mcu/pal.cmake b/compiler/luci-interpreter/pal/mcu/pal.cmake
index 2307ac727..a479d407b 100644
--- a/compiler/luci-interpreter/pal/mcu/pal.cmake
+++ b/compiler/luci-interpreter/pal/mcu/pal.cmake
@@ -1,8 +1,8 @@
macro(initialize_pal)
- nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
- nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
- nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET)
- nnas_find_package(TensorFlowRuySource EXACT 2.3.0 QUIET)
+ nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+ nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
if (NOT TensorFlowSource_FOUND)
message(STATUS "Skipping luci-interpreter: TensorFlow not found")
@@ -30,7 +30,7 @@ endmacro()
macro(add_pal_to_target TGT)
target_include_directories(${TGT} PRIVATE "${PAL}")
- target_include_directories(${TGT} SYSTEM PRIVATE
+ target_include_directories(${TGT} PRIVATE
"${TensorFlowRuySource_DIR}"
"${TensorFlowGEMMLowpSource_DIR}"
"${TensorFlowEigenSource_DIR}"
@@ -42,7 +42,12 @@ macro(add_pal_to_target TGT)
set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES})
set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
- target_include_directories(luci_interpreter_mcu_pal SYSTEM PRIVATE "${TensorFlowSource_DIR}")
+ target_include_directories(luci_interpreter_mcu_pal PRIVATE
+ "${TensorFlowRuySource_DIR}"
+ "${TensorFlowGEMMLowpSource_DIR}"
+ "${TensorFlowEigenSource_DIR}"
+ "${TensorFlowSource_DIR}"
+ )
target_link_libraries(${TGT} PRIVATE luci_interpreter_mcu_pal)
#target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_mcu_pal)
diff --git a/compiler/luci-interpreter/src/BuddyMemoryManager.cpp b/compiler/luci-interpreter/src/BuddyMemoryManager.cpp
new file mode 100644
index 000000000..6ad1f320c
--- /dev/null
+++ b/compiler/luci-interpreter/src/BuddyMemoryManager.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/BuddyMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize)
+{
+ int32_t p = lowerLog2(memSize);
+
+ // We assume that the requested size of memory does not exceed 4 GB
+ assert(p < 32);
+ memSize = 1 << p;
+
+ _start_block = reinterpret_cast<Block *>(memory_start);
+ _start_block->size = memSize - sizeof(Block);
+ _start_block->is_free = true;
+ _start_block->self = _start_block;
+ _num_blocks = 0;
+ _size = _start_block->size;
+
+ for (auto &_free_block : _free_blocks)
+ _free_block = nullptr;
+
+ addToBlocks(_start_block, p);
+}
+
+void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+ const size_t element_size = getDataTypeSize(tensor.element_type());
+ const int32_t num_elements = tensor.shape().num_elements();
+ auto size = num_elements * element_size;
+ auto footprint = size + sizeof(Block);
+ auto l = (footprint & (footprint - 1)) == 0
+ ? lowerLog2(footprint)
+ : lowerLog2(footprint) + 1; // check footprint is pow_of_2
+
+ while (l < 32 && !_free_blocks[l])
+ l++;
+
+ assert(l < 32);
+
+ Block *tmp;
+ tmp = _free_blocks[l];
+ removeFromBlocks(tmp, l);
+
+ while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block))
+ {
+ divideBlock(tmp, l);
+ l--;
+ }
+
+ tmp->is_free = false;
+ tmp->self = tmp;
+ _num_blocks++;
+
+ auto *data = (uint8_t *)(tmp + 1);
+ tensor.set_data_buffer(data);
+}
+
+void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+ auto data = tensor.data<void>();
+ auto *tmp = (Block *)((uint8_t *)data - sizeof(Block));
+
+ assert(tmp->self == tmp);
+
+ tmp->is_free = true;
+ addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block)));
+
+ while (tmp)
+ if (tmp->size == _size)
+ break;
+ else
+ tmp = mergeBlock(tmp);
+
+ _num_blocks--;
+ tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp b/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp
new file mode 100644
index 000000000..29fb767b7
--- /dev/null
+++ b/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/BuddyMemoryManager.h"
+#include <gtest/gtest.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(BuddyMemoryManager, basic)
+{
+ auto mem_pool = std::make_unique<uint8_t[]>(200);
+ auto buddy_memory_manager = std::make_unique<BuddyMemoryManager>(mem_pool.get(), 130);
+ Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor");
+
+ buddy_memory_manager->allocate_memory(first_tensor);
+
+ uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+ first_tensor.writeData(data_1, 8);
+ uint8_t array_1[8];
+ first_tensor.readData(array_1, 8);
+ for (int i = 0; i < 8; i++)
+ {
+ EXPECT_EQ(data_1[i], array_1[i]);
+ }
+
+ Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor");
+ buddy_memory_manager->allocate_memory(second_tensor);
+
+ uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}};
+ second_tensor.writeData(data_2, 10);
+
+ uint8_t array_2[2][5];
+ second_tensor.readData(array_2, 10);
+ for (int i = 0; i < 2; i++)
+ {
+ for (int j = 0; j < 5; j++)
+ {
+ EXPECT_EQ(data_2[i][j], array_2[i][j]);
+ }
+ }
+
+ buddy_memory_manager->release_memory(first_tensor);
+ EXPECT_EQ(first_tensor.data<void>(), nullptr);
+
+ buddy_memory_manager->release_memory(second_tensor);
+ EXPECT_EQ(second_tensor.data<void>(), nullptr);
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/CMakeLists.txt b/compiler/luci-interpreter/src/CMakeLists.txt
index 6f34b6117..e37150336 100644
--- a/compiler/luci-interpreter/src/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/CMakeLists.txt
@@ -1,13 +1,19 @@
-include(${LUCI_INTERPRETER_PAL_DIR}/pal.cmake)
+include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake")
initialize_pal()
if (NOT PAL_INITIALIZED)
+ message("PAL Failed to initialize, skip luci-interpreter")
return()
endif()
message(STATUS "LUCI INTERPRETER BEGIN")
+set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}")
+
add_subdirectory(core)
message(STATUS "LUCI INTERPRETER CORE")
add_subdirectory(kernels)
@@ -19,15 +25,34 @@ message(STATUS "LUCI INTERPTER INITALIZED")
set(SOURCES
"${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h"
- Interpreter.cpp)
-
-add_library(luci_interpreter SHARED ${SOURCES})
-target_include_directories(luci_interpreter PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
-target_include_directories(luci_interpreter PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
-target_link_libraries(luci_interpreter
- PUBLIC luci_lang luci_interpreter_loader luci_interpreter_core
+ Interpreter.cpp "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" SimpleMemoryManager.cpp
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" TestMemoryManager.cpp
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/BuddyMemoryManager.h" BuddyMemoryManager.cpp
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/StaticMemoryManager.h" StaticMemoryManager.cpp)
+
+if (NOT LUCI_INTERPRETER_STATIC)
+ add_library(${LUCI_INTERPRETER_BINARY} SHARED ${SOURCES})
+else ()
+ add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES})
+endif ()
+
+set(TEST_SOURCES BuddyMemoryManager.test.cpp)
+
+target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_BINARY}
+ PUBLIC luci_lang ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE}
PRIVATE nncc_common)
-install(TARGETS luci_interpreter DESTINATION lib)
+install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib)
install(DIRECTORY include/ DESTINATION include
FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(buddy_manager_test ${TEST_SOURCES})
+target_link_libraries(buddy_manager_test ${LUCI_INTERPRETER_BINARY})
diff --git a/compiler/luci-interpreter/src/Interpreter.cpp b/compiler/luci-interpreter/src/Interpreter.cpp
index b57b691d0..1b8792a6c 100644
--- a/compiler/luci-interpreter/src/Interpreter.cpp
+++ b/compiler/luci-interpreter/src/Interpreter.cpp
@@ -15,6 +15,7 @@
*/
#include "luci_interpreter/Interpreter.h"
+#include "luci_interpreter/SimpleMemoryManager.h"
#include "loader/ModuleLoader.h"
@@ -69,12 +70,25 @@ private:
} // namespace
-Interpreter::Interpreter(const luci::Module *module)
+Interpreter::Interpreter(const luci::Module *module,
+ luci_interpreter::IMemoryManager *memory_manager)
{
_runtime_to_ir = std::make_unique<RuntimeToIR>();
_event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
_runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
- ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor);
+
+ if (memory_manager == nullptr)
+ {
+ _default_memory_manager = std::make_unique<SimpleMemoryManager>();
+ _memory_manager = _default_memory_manager.get();
+ }
+ else
+ {
+ _memory_manager = memory_manager;
+ }
+
+ ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
+ _memory_manager);
loader.load();
}
diff --git a/compiler/luci-interpreter/src/SimpleMemoryManager.cpp b/compiler/luci-interpreter/src/SimpleMemoryManager.cpp
new file mode 100644
index 000000000..230e39896
--- /dev/null
+++ b/compiler/luci-interpreter/src/SimpleMemoryManager.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/SimpleMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void SimpleMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+ if (!tensor.is_allocatable())
+ {
+ return;
+ }
+ if (tensor.is_data_allocated())
+ {
+ release_memory(tensor);
+ }
+ const auto element_size = getDataTypeSize(tensor.element_type());
+ const auto num_elements = tensor.shape().num_elements();
+
+ auto *data = new uint8_t[num_elements * element_size];
+ tensor.set_data_buffer(data);
+}
+
+void SimpleMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+ if (!tensor.is_data_allocated())
+ {
+ tensor.set_data_buffer(nullptr);
+ return;
+ }
+ auto data = tensor.data<uint8_t>();
+ delete[] data;
+ tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/StaticMemoryManager.cpp b/compiler/luci-interpreter/src/StaticMemoryManager.cpp
new file mode 100644
index 000000000..73a819919
--- /dev/null
+++ b/compiler/luci-interpreter/src/StaticMemoryManager.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/StaticMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void StaticMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+ if (!tensor.is_allocatable())
+ {
+ return;
+ }
+ int32_t offset = tensor.get_offset();
+ assert(offset >= 0);
+ auto tensor_ptr = _buffer_ptr + offset;
+ tensor.set_data_buffer(tensor_ptr);
+}
+
+void StaticMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+ tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/TestMemoryManager.cpp b/compiler/luci-interpreter/src/TestMemoryManager.cpp
new file mode 100644
index 000000000..3beeee55c
--- /dev/null
+++ b/compiler/luci-interpreter/src/TestMemoryManager.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+ if (!tensor.is_allocatable())
+ {
+ return;
+ }
+ if (tensor.is_data_allocated())
+ {
+ release_memory(tensor);
+ }
+ const auto element_size = getDataTypeSize(tensor.element_type());
+ const auto num_elements = tensor.shape().num_elements();
+
+ auto *data = new uint8_t[num_elements * element_size];
+ allocations.push_back(data);
+ tensor.set_data_buffer(data);
+}
+
+void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+ tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-interpreter/src/core/CMakeLists.txt
index e576dbd94..4430cba11 100644
--- a/compiler/luci-interpreter/src/core/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/core/CMakeLists.txt
@@ -9,9 +9,9 @@ set(SOURCES
RuntimeModule.h
Tensor.cpp)
-add_library(luci_interpreter_core STATIC ${SOURCES})
-set_target_properties(luci_interpreter_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(luci_interpreter_core PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
-target_include_directories(luci_interpreter_core PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
-target_link_libraries(luci_interpreter_core PUBLIC luci_lang)
-target_link_libraries(luci_interpreter_core PRIVATE nncc_common)
+add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES})
+set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang)
+target_link_libraries(${LUCI_INTERPRETER_CORE} PRIVATE nncc_common)
diff --git a/compiler/luci-interpreter/src/core/Kernel.h b/compiler/luci-interpreter/src/core/Kernel.h
index 5cdb2e360..a7c4a4218 100644
--- a/compiler/luci-interpreter/src/core/Kernel.h
+++ b/compiler/luci-interpreter/src/core/Kernel.h
@@ -36,8 +36,8 @@ protected:
public:
virtual ~Kernel() = default;
- std::vector<const Tensor *> getInputTensors() const { return _inputs; }
- std::vector<Tensor *> getOutputTensors() const { return _outputs; }
+ const std::vector<const Tensor *> &getInputTensors() const { return _inputs; }
+ const std::vector<Tensor *> &getOutputTensors() const { return _outputs; }
// Configures the kernel.
// This function is currently called once for each kernel during interpreter construction,
diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
index fb0ad304b..c2f8d2ea8 100644
--- a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
+++ b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
@@ -29,8 +29,10 @@ class RuntimeGraph::TensorAllocPlan
std::vector<std::vector<Tensor *>> _alloc_plan;
std::vector<std::vector<Tensor *>> _dealloc_plan;
bool _valid = false;
+ IMemoryManager *_memory_manager;
public:
+ explicit TensorAllocPlan(IMemoryManager *memory_manager);
void invalidate() { _valid = false; }
bool isValid() const { return _valid; }
void build(const RuntimeGraph &graph);
@@ -38,6 +40,11 @@ public:
void deallocate(size_t kernel_index) const;
};
+RuntimeGraph::TensorAllocPlan::TensorAllocPlan(IMemoryManager *memory_manager)
+ : _memory_manager(memory_manager)
+{
+}
+
void RuntimeGraph::TensorAllocPlan::build(const RuntimeGraph &graph)
{
invalidate();
@@ -80,7 +87,7 @@ void RuntimeGraph::TensorAllocPlan::allocate(size_t kernel_index) const
assert(_valid && kernel_index < _alloc_plan.size());
for (Tensor *tensor : _alloc_plan[kernel_index])
{
- tensor->allocate();
+ _memory_manager->allocate_memory(*tensor);
}
}
@@ -89,16 +96,24 @@ void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const
assert(_valid && kernel_index < _dealloc_plan.size());
for (Tensor *tensor : _dealloc_plan[kernel_index])
{
- tensor->deallocate();
+ _memory_manager->release_memory(*tensor);
}
}
-RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module)
- : _owning_module(owning_module), _tensor_alloc_plan(std::make_unique<TensorAllocPlan>())
+RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager)
+ : _owning_module(owning_module), _memory_manager(memory_manager),
+ _tensor_alloc_plan(std::make_unique<TensorAllocPlan>(memory_manager))
{
}
-RuntimeGraph::~RuntimeGraph() {}
+RuntimeGraph::~RuntimeGraph()
+{
+ for (auto &tensor : _tensors)
+ {
+ if (tensor->is_data_allocated())
+ _memory_manager->release_memory(*tensor);
+ }
+}
Tensor *RuntimeGraph::addTensor(std::unique_ptr<Tensor> &&tensor)
{
@@ -121,6 +136,11 @@ void RuntimeGraph::setOutputTensors(const std::vector<Tensor *> &output_tensors)
_output_tensors = output_tensors;
}
+void RuntimeGraph::configureAllocations(Tensor *tensor)
+{
+ _memory_manager->allocate_memory(*tensor);
+}
+
void RuntimeGraph::addKernel(std::unique_ptr<Kernel> &&kernel)
{
assert(kernel != nullptr);
@@ -140,7 +160,8 @@ void RuntimeGraph::execute() const
{
for (const Tensor *input_tensor : getInputTensors())
{
- event_notifier->postTensorWrite(input_tensor);
+ if (input_tensor->is_observable())
+ event_notifier->postTensorWrite(input_tensor);
}
}
@@ -155,11 +176,10 @@ void RuntimeGraph::execute() const
// TODO The `configure` method should only be called if the outputs of an operator need to be
// resized.
kernel->configure();
-// TODO decide where to allocate memory, and uncomment/remove this if
-#if 0
- _tensor_alloc_plan->allocate(
- index); // Preallocate outputs in advance instead of relying on automatic allocation
-#endif
+
+ // Preallocate outputs in advance instead of relying on automatic allocation
+ _tensor_alloc_plan->allocate(index);
+
kernel->execute();
if (event_notifier != nullptr)
@@ -169,7 +189,7 @@ void RuntimeGraph::execute() const
for (const Tensor *tensor : kernel->getOutputTensors())
{
- if (event_notifier != nullptr)
+ if (event_notifier != nullptr && tensor->is_observable())
{
event_notifier->postTensorWrite(tensor);
}
diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.h b/compiler/luci-interpreter/src/core/RuntimeGraph.h
index 5f732025d..8184e249d 100644
--- a/compiler/luci-interpreter/src/core/RuntimeGraph.h
+++ b/compiler/luci-interpreter/src/core/RuntimeGraph.h
@@ -18,6 +18,7 @@
#define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
#include "luci_interpreter/core/Tensor.h"
+#include "luci_interpreter/MemoryManager.h"
#include "core/Kernel.h"
#include <memory>
@@ -35,7 +36,7 @@ private:
friend class TensorAllocPlan;
public:
- explicit RuntimeGraph(RuntimeModule *owning_module);
+ explicit RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager);
~RuntimeGraph();
Tensor *addTensor(std::unique_ptr<Tensor> &&tensor);
@@ -43,6 +44,8 @@ public:
void setInputTensors(const std::vector<Tensor *> &input_tensors);
void setOutputTensors(const std::vector<Tensor *> &output_tensors);
+ void configureAllocations(Tensor *tensor);
+
const std::vector<Tensor *> &getInputTensors() const { return _input_tensors; }
const std::vector<Tensor *> &getOutputTensors() const { return _output_tensors; }
@@ -51,6 +54,7 @@ public:
void execute() const;
private:
+ IMemoryManager *_memory_manager;
RuntimeModule *_owning_module;
std::vector<std::unique_ptr<Tensor>> _tensors;
std::vector<Tensor *> _input_tensors;
diff --git a/compiler/luci-interpreter/src/core/RuntimeModule.h b/compiler/luci-interpreter/src/core/RuntimeModule.h
index dccc3a173..78873b0ec 100644
--- a/compiler/luci-interpreter/src/core/RuntimeModule.h
+++ b/compiler/luci-interpreter/src/core/RuntimeModule.h
@@ -19,6 +19,7 @@
#include "core/RuntimeGraph.h"
#include "core/EventNotifier.h"
+#include "luci_interpreter/MemoryManager.h"
#include <memory>
#include <vector>
@@ -33,9 +34,9 @@ public:
EventNotifier *getEventNotifier() const { return _event_notifier; }
- RuntimeGraph *addGraph()
+ RuntimeGraph *addGraph(IMemoryManager *memory_manager)
{
- _graphs.push_back(std::make_unique<RuntimeGraph>(this));
+ _graphs.push_back(std::make_unique<RuntimeGraph>(this, memory_manager));
return _graphs.back().get();
}
diff --git a/compiler/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-interpreter/src/core/Tensor.cpp
index a9e7be0a9..3c3c5ffff 100644
--- a/compiler/luci-interpreter/src/core/Tensor.cpp
+++ b/compiler/luci-interpreter/src/core/Tensor.cpp
@@ -29,21 +29,6 @@ Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantizati
{
}
-void Tensor::allocate()
-{
- deallocate();
- const size_t element_size = getDataTypeSize(_element_type);
- const int32_t num_elements = _shape.num_elements();
- _data = std::make_unique<uint8_t[]>(num_elements * element_size);
- _data_allocated = true;
-}
-
-void Tensor::deallocate()
-{
- _data_allocated = false;
- _data.reset();
-}
-
void Tensor::readData(void *data_ptr, size_t data_size) const
{
const size_t element_size = getDataTypeSize(element_type());
@@ -68,10 +53,6 @@ void Tensor::writeData(const void *data_ptr, size_t data_size)
std::memcpy(data<void>(), data_ptr, data_size);
}
-void Tensor::resize(const Shape &new_shape)
-{
- deallocate();
- _shape = new_shape;
-}
+void Tensor::resize(const Shape &new_shape) { _shape = new_shape; }
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-interpreter/src/kernels/Add.test.cpp
index 5ad9beb30..847b65667 100644
--- a/compiler/luci-interpreter/src/kernels/Add.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Add.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,6 +28,14 @@ namespace
using namespace testing;
+class AddTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
// for quantized Add, the error shouldn't exceed step
float GetTolerance(float min, float max)
{
@@ -34,7 +43,7 @@ float GetTolerance(float min, float max)
return kQuantizedStep;
}
-TEST(AddTest, Uint8)
+TEST_F(AddTest, Uint8)
{
std::initializer_list<int32_t> base_shape = {2, 3, 1, 2};
std::initializer_list<float> base_data = {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
@@ -57,10 +66,10 @@ TEST(AddTest, Uint8)
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
for (int i = 0; i < output_data.size(); i++)
{
- Tensor input1_tensor =
- makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
- Tensor input2_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
- quant_param.second, test_data);
+ Tensor input1_tensor = makeInputTensor<DataType::U8>(
+ base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U8>(
+ test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
Tensor output_tensor =
makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
@@ -69,6 +78,7 @@ TEST(AddTest, Uint8)
Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -78,10 +88,10 @@ TEST(AddTest, Uint8)
// Re-run with exchanged inputs.
for (int i = 0; i < output_data.size(); i++)
{
- Tensor input1_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
- quant_param.second, test_data);
- Tensor input2_tensor =
- makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+ Tensor input1_tensor = makeInputTensor<DataType::U8>(
+ test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U8>(
+ base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
Tensor output_tensor =
makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
@@ -90,6 +100,7 @@ TEST(AddTest, Uint8)
Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -98,7 +109,7 @@ TEST(AddTest, Uint8)
}
}
-TEST(AddTest, Float)
+TEST_F(AddTest, Float)
{
Shape base_shape = {2, 3, 1, 2};
std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
@@ -116,8 +127,10 @@ TEST(AddTest, Float)
std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
for (size_t i = 0; i < test_shapes.size(); ++i)
{
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
AddParams params{};
@@ -125,6 +138,7 @@ TEST(AddTest, Float)
Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
@@ -133,8 +147,10 @@ TEST(AddTest, Float)
// Re-run with exchanged inputs.
for (size_t i = 0; i < test_shapes.size(); ++i)
{
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
AddParams params{};
@@ -142,6 +158,7 @@ TEST(AddTest, Float)
Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
@@ -149,7 +166,7 @@ TEST(AddTest, Float)
}
}
-TEST(AddTest, SInt16)
+TEST_F(AddTest, SInt16)
{
Shape base_shape = {2, 3, 1, 2};
std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
@@ -171,9 +188,10 @@ TEST(AddTest, SInt16)
for (size_t i = 0; i < test_shapes.size(); ++i)
{
- Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data);
- Tensor input2_tensor =
- makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data);
+ Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data,
+ _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0,
+ input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
const float tolerance = output_tensor.scale();
@@ -182,6 +200,7 @@ TEST(AddTest, SInt16)
Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor),
@@ -193,9 +212,10 @@ TEST(AddTest, SInt16)
// Re-run with exchanged inputs and different scales.
for (size_t i = 0; i < test_shapes.size(); ++i)
{
- Tensor input1_tensor =
- makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data);
- Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data);
+ Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0,
+ input2_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data,
+ _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 5.0 / 32767, 0);
const float tolerance = output_tensor.scale();
@@ -204,6 +224,7 @@ TEST(AddTest, SInt16)
Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor),
@@ -214,10 +235,10 @@ TEST(AddTest, SInt16)
}
}
-TEST(AddTest, Input_Output_Type_NEG)
+TEST_F(AddTest, Input_Output_Type_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2});
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
AddParams params{};
@@ -227,10 +248,10 @@ TEST(AddTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(AddTest, Invalid_Input_Type_NEG)
+TEST_F(AddTest, Invalid_Input_Type_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1});
- Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2});
+ Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S64);
AddParams params{};
@@ -238,6 +259,7 @@ TEST(AddTest, Invalid_Input_Type_NEG)
Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
EXPECT_ANY_THROW(kernel.execute());
}
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.cpp
index 2437d5762..6561a1783 100644
--- a/compiler/luci-interpreter/src/kernels/ArgMax.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.cpp
@@ -16,7 +16,7 @@
#include "kernels/ArgMax.h"
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALArgMax.h"
namespace luci_interpreter
{
@@ -60,10 +60,10 @@ void ArgMax::configure()
void ArgMax::execute() const
{
-#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \
- tflite::optimized_ops::ArgMinMax( \
- getTensorShape(input()), getTensorData<data_type>(input()), getTensorData<axis_type>(axis()), \
- getTensorShape(output()), getTensorData<output_type>(output()), std::greater<data_type>())
+#define TF_LITE_ARG_MAX(data_type, axis_type, output_type) \
+ luci_interpreter_pal::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \
+ getTensorData<axis_type>(axis()), getTensorShape(output()), \
+ getTensorData<output_type>(output()), std::greater<data_type>())
if (axis()->element_type() == DataType::S32)
{
switch (_params.output_type)
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
index 3362edbf6..119c69ccf 100644
--- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/ArgMax.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -32,15 +33,19 @@ void Check(std::initializer_list<int32_t> input_shape,
std::initializer_list<int32_t> output_shape, std::initializer_list<T1> input_data,
std::initializer_list<int32_t> dimension_data, std::initializer_list<T2> output_data)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
constexpr DataType element_type = getElementType<T1>();
- Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
- Tensor dimension_tensor = makeInputTensor<DataType::S32>(dimension_shape, dimension_data);
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor dimension_tensor =
+ makeInputTensor<DataType::S32>(dimension_shape, dimension_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(getElementType<T2>());
ArgMaxParams params{};
params.output_type = getElementType<T2>();
ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -94,17 +99,21 @@ TYPED_TEST(ArgMaxTest, MultiDimensions)
TEST(ArgMaxTest, UnsupportedType_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, {
- 1, 2, 7, 8, //
- 1, 9, 7, 3, //
- });
- Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4},
+ {
+ 1, 2, 7, 8, //
+ 1, 9, 7, 3, //
+ },
+ memory_manager.get());
+ Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
ArgMaxParams params{};
params.output_type = DataType::U8;
ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
EXPECT_ANY_THROW(kernel.execute());
}
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
index 65ea4c09e..5545fb4d4 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -70,6 +70,11 @@ void AveragePool2D::configure()
LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
}
+ else if (input()->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+ }
output()->resize({batches, output_height, output_width, depth});
}
@@ -86,6 +91,9 @@ void AveragePool2D::execute() const
case DataType::S16:
evalSInt16();
break;
+ case DataType::S8:
+ evalSInt8();
+ break;
default:
throw std::runtime_error("Unsupported type.");
}
@@ -132,6 +140,26 @@ void AveragePool2D::evalQuantized() const
getTensorData<uint8_t>(output()));
}
+void AveragePool2D::evalSInt8() const
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+ tflite::PoolParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.filter_height = _params.filter_height;
+ params.filter_width = _params.filter_width;
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ tflite::reference_integer_ops::AveragePool(
+ params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()),
+ getTensorData<int8_t>(output()));
+}
+
void AveragePool2D::evalSInt16() const
{
int32_t activation_min{};
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-interpreter/src/kernels/AveragePool2D.h
index 282a58797..b98367f31 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.h
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.h
@@ -40,6 +40,7 @@ private:
void evalFloat() const;
void evalQuantized() const;
void evalSInt16() const;
+ void evalSInt8() const;
private:
int32_t _padding_height{};
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
index 4d7dab86a..7ed421129 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/AveragePool2D.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -26,7 +27,15 @@ namespace
using namespace testing;
-TEST(AveragePool2DTest, Float)
+class AveragePool2DTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(AveragePool2DTest, Float)
{
Shape input_shape{1, 3, 5, 1};
std::vector<float> input_data{
@@ -34,7 +43,8 @@ TEST(AveragePool2DTest, Float)
1, 2, 3, 4, 5, //
6, 7, 8, 9, 10, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pool2DParams params{};
@@ -47,6 +57,7 @@ TEST(AveragePool2DTest, Float)
AveragePool2D kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{
@@ -57,15 +68,15 @@ TEST(AveragePool2DTest, Float)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
}
-TEST(AveragePool2DTest, Uint8_0)
+TEST_F(AveragePool2DTest, Uint8_0)
{
std::vector<float> input_data{
0, -6, 12, 4, //
-3, -2, 10, 7, //
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
Pool2DParams params{};
@@ -78,13 +89,14 @@ TEST(AveragePool2DTest, Uint8_0)
AveragePool2D kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0.0, 6.0}));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
}
-TEST(AveragePool2DTest, Uint8_1)
+TEST_F(AveragePool2DTest, Uint8_1)
{
std::vector<float> input_data{
0, 6, 12, 4, //
@@ -92,8 +104,8 @@ TEST(AveragePool2DTest, Uint8_1)
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
Pool2DParams params{};
@@ -106,13 +118,14 @@ TEST(AveragePool2DTest, Uint8_1)
AveragePool2D kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0}));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
}
-TEST(AveragePool2DTest, SInt16)
+TEST_F(AveragePool2DTest, SInt16)
{
Shape input_shape{1, 3, 5, 1};
std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
@@ -125,7 +138,8 @@ TEST(AveragePool2DTest, SInt16)
0, 1.5, //
4.5, 6, //
};
- Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
Pool2DParams params{};
@@ -138,13 +152,47 @@ TEST(AveragePool2DTest, SInt16)
AveragePool2D kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
}
-TEST(AveragePool2DTest, Invalid_Input_Shape_NEG)
+TEST_F(AveragePool2DTest, SInt8)
+{
+ Shape input_shape{1, 4, 5, 1};
+ std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+ std::vector<float> input_data{-7, -3, 0, 2, -5, 12, -15, 3, 10, 5,
+ 7, -6, -1, 9, -2, 0, -5, 11, -1, -7};
+ std::vector<float> ref_output_data{
+ 0, 2.5, //
+ 1, 1.5, //
+ };
+
+ std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-15.9375f, 15.9375f);
+ Tensor input_tensor = makeInputTensor<DataType::S8>(
+ input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+
+ Pool2DParams params{};
+ params.padding = Padding::VALID;
+ params.filter_height = 2;
+ params.filter_width = 3;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.activation = Activation::RELU6;
+
+ AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG)
{
Shape input_shape{1, 3, 5};
std::vector<float> input_data{
@@ -152,7 +200,8 @@ TEST(AveragePool2DTest, Invalid_Input_Shape_NEG)
1, 2, 3, 4, 5, //
6, 7, 8, 9, 10, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pool2DParams params{};
@@ -167,7 +216,7 @@ TEST(AveragePool2DTest, Invalid_Input_Shape_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(AveragePool2DTest, In_Out_Type_NEG)
+TEST_F(AveragePool2DTest, In_Out_Type_NEG)
{
Shape input_shape{1, 3, 5, 1};
std::vector<float> input_data{
@@ -175,7 +224,8 @@ TEST(AveragePool2DTest, In_Out_Type_NEG)
1, 2, 3, 4, 5, //
6, 7, 8, 9, 10, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
Pool2DParams params{};
@@ -190,7 +240,7 @@ TEST(AveragePool2DTest, In_Out_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(AveragePool2DTest, Quant_Param_NEG)
+TEST_F(AveragePool2DTest, Quant_Param_NEG)
{
std::vector<float> input_data{
0, -6, 12, 4, //
@@ -199,8 +249,8 @@ TEST(AveragePool2DTest, Quant_Param_NEG)
std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f);
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param1.first,
- quant_param1.second, input_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param1.first, quant_param1.second, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second);
Pool2DParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp
index 591fcc00a..bd315ff7b 100644
--- a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp
+++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp
@@ -18,7 +18,7 @@
#include "kernels/BatchToSpaceND.h"
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALBatchToSpaceND.h"
#include <stdexcept>
@@ -83,13 +83,13 @@ void BatchToSpaceND::execute() const
switch (input()->element_type())
{
case DataType::FLOAT32:
- tflite::optimized_ops::BatchToSpaceND(
+ luci_interpreter_pal::BatchToSpaceND(
getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()),
getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output()));
break;
case DataType::U8:
- tflite::optimized_ops::BatchToSpaceND(
+ luci_interpreter_pal::BatchToSpaceND(
getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()),
getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
getTensorData<int32_t>(crops()), getTensorShape(output()),
diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
index a29981d17..f3a344974 100644
--- a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/BatchToSpaceND.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -33,14 +34,19 @@ void Check(std::initializer_list<int32_t> input_shape,
std::initializer_list<T> input_data, std::initializer_list<int32_t> block_shape_data,
std::initializer_list<int32_t> crops_data, std::initializer_list<T> output_data)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
constexpr DataType element_type = getElementType<T>();
- Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
- Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data);
- Tensor crops_tensor = makeInputTensor<DataType::S32>(crops_shape, crops_data);
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor block_shape_tensor =
+ makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+ Tensor crops_tensor =
+ makeInputTensor<DataType::S32>(crops_shape, crops_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(element_type);
BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -65,10 +71,11 @@ TYPED_TEST(BatchToSpaceNDTest, Simple)
TEST(BatchToSpaceNDTest, Invalid_Shape_NEG)
{
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>({3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
- Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2});
- Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, memory_manager.get());
+ Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+ Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
@@ -77,10 +84,11 @@ TEST(BatchToSpaceNDTest, Invalid_Shape_NEG)
TEST(BatchToSpaceNDTest, Invalid_Crops_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
- {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
- Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2});
- Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0});
+ {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, memory_manager.get());
+ Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+ Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
index 9801e11af..1b7d0f66a 100644
--- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
@@ -1,158 +1,27 @@
-find_package(Threads REQUIRED)
-
set(SOURCES
- Add.h
- Add.cpp
- ArgMax.h
- ArgMax.cpp
- AveragePool2D.h
- AveragePool2D.cpp
- BatchToSpaceND.h
- BatchToSpaceND.cpp
- Cast.h
- Cast.cpp
- Concatenation.h
- Concatenation.cpp
- Conv2D.h
- Conv2D.cpp
- DepthToSpace.h
- DepthToSpace.cpp
- DepthwiseConv2D.h
- DepthwiseConv2D.cpp
- Div.h
- Div.cpp
- Elu.h
- Elu.cpp
- Exp.h
- Exp.cpp
- Floor.h
- Floor.cpp
- FloorDiv.h
- FloorDiv.cpp
- Equal.h
- Equal.cpp
- FullyConnected.h
- FullyConnected.cpp
- Greater.h
- Greater.cpp
- GreaterEqual.h
- GreaterEqual.cpp
- If.h
- If.cpp
- InstanceNorm.h
- InstanceNorm.cpp
- L2Normalize.h
- L2Normalize.cpp
- L2Pool2D.h
- L2Pool2D.cpp
- LeakyRelu.h
- LeakyRelu.cpp
- Less.h
- Less.cpp
- LessEqual.h
- LessEqual.cpp
- LocalResponseNormalization.h
- LocalResponseNormalization.cpp
- LogicalAnd.h
- LogicalAnd.cpp
- LogicalNot.h
- LogicalNot.cpp
- LogicalOr.h
- LogicalOr.cpp
- Logistic.h
- Logistic.cpp
- LogSoftmax.h
- LogSoftmax.cpp
- Maximum.h
- Maximum.cpp
- MaxPool2D.h
- MaxPool2D.cpp
- Mean.h
- Mean.cpp
- Minimum.h
- Minimum.cpp
- MirrorPad.h
- MirrorPad.cpp
- Mul.h
- Mul.cpp
- Neg.h
- Neg.cpp
- NotEqual.h
- NotEqual.cpp
- Pack.h
- Pack.cpp
- Pad.h
- Pad.cpp
- PadV2.h
- PadV2.cpp
- Pow.h
- Pow.cpp
- PRelu.h
- PRelu.cpp
- Relu.h
- Relu.cpp
- Relu6.h
- Relu6.cpp
- Reshape.h
- Reshape.cpp
- ResizeBilinear.h
- ResizeBilinear.cpp
- ResizeNearestNeighbor.h
- ResizeNearestNeighbor.cpp
- ReverseV2.h
- ReverseV2.cpp
- Rsqrt.h
- Rsqrt.cpp
- Slice.h
- Slice.cpp
- Softmax.h
- Softmax.cpp
- SpaceToBatchND.h
- SpaceToBatchND.cpp
- SpaceToDepth.h
- SpaceToDepth.cpp
- Split.h
- Split.cpp
- StridedSlice.h
- StridedSlice.cpp
- Sqrt.h
- Sqrt.cpp
- Square.h
- Square.cpp
- SquaredDifference.h
- SquaredDifference.cpp
- Squeeze.h
- Squeeze.cpp
- Sub.h
- Sub.cpp
- Tanh.h
- Tanh.cpp
- Transpose.h
- Transpose.cpp
- TransposeConv.h
- TransposeConv.cpp
- Unpack.h
- Unpack.cpp
- While.h
- While.cpp)
+ BinaryOpCommon.h
+ Utils.h
+ Utils.cpp
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h"
+ ${LUCI_INTERPRETER_SOURCE_DIR}/TestMemoryManager.cpp
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h"
+ ${LUCI_INTERPRETER_SOURCE_DIR}/SimpleMemoryManager.cpp)
+
+macro(REGISTER_KERNEL NODE)
+ list(APPEND SOURCES "${NODE}.h")
+ list(APPEND SOURCES "${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
-list(APPEND SOURCES
- BinaryOpCommon.h
- Utils.h
- Utils.cpp
- ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES})
+set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
-add_library(luci_interpreter_kernels STATIC ${SOURCES})
-set_target_properties(luci_interpreter_kernels PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(luci_interpreter_kernels PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
-target_include_directories(luci_interpreter_kernels SYSTEM PRIVATE
- "${TensorFlowRuySource_DIR}"
- "${TensorFlowGEMMLowpSource_DIR}"
- "${TensorFlowEigenSource_DIR}"
- "${TensorFlowSource_DIR}")
-target_link_libraries(luci_interpreter_kernels
- PUBLIC luci_interpreter_core
- PRIVATE nncc_common Threads::Threads)
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PRIVATE nncc_common)
+
+add_pal_to_target(${LUCI_INTERPRETER_KERNELS})
if(NOT ENABLE_TEST)
return()
@@ -160,75 +29,13 @@ endif(NOT ENABLE_TEST)
nnas_find_package(GTest REQUIRED)
-set(TEST_SOURCES
- Add.test.cpp
- ArgMax.test.cpp
- AveragePool2D.test.cpp
- BatchToSpaceND.test.cpp
- Cast.test.cpp
- Concatenation.test.cpp
- Conv2D.test.cpp
- DepthToSpace.test.cpp
- DepthwiseConv2D.test.cpp
- Div.test.cpp
- Elu.test.cpp
- Exp.test.cpp
- Floor.test.cpp
- FloorDiv.test.cpp
- Equal.test.cpp
- FullyConnected.test.cpp
- Greater.test.cpp
- GreaterEqual.test.cpp
- If.test.cpp
- InstanceNorm.test.cpp
- L2Normalize.test.cpp
- L2Pool2D.test.cpp
- LeakyRelu.test.cpp
- Less.test.cpp
- LessEqual.test.cpp
- LocalResponseNormalization.test.cpp
- LogicalAnd.test.cpp
- LogicalNot.test.cpp
- LogicalOr.test.cpp
- Logistic.test.cpp
- LogSoftmax.test.cpp
- Maximum.test.cpp
- MaxPool2D.test.cpp
- Mean.test.cpp
- Minimum.test.cpp
- Mul.test.cpp
- Neg.test.cpp
- NotEqual.test.cpp
- Pack.test.cpp
- Pad.test.cpp
- PadV2.test.cpp
- Pow.test.cpp
- PRelu.test.cpp
- Relu.test.cpp
- Relu6.test.cpp
- Reshape.test.cpp
- ResizeBilinear.test.cpp
- ResizeNearestNeighbor.test.cpp
- ReverseV2.test.cpp
- Rsqrt.test.cpp
- Slice.test.cpp
- Softmax.test.cpp
- SpaceToBatchND.test.cpp
- SpaceToDepth.test.cpp
- Split.test.cpp
- StridedSlice.test.cpp
- Sqrt.test.cpp
- Square.test.cpp
- SquaredDifference.test.cpp
- Squeeze.test.cpp
- Sub.test.cpp
- Tanh.test.cpp
- Transpose.test.cpp
- TransposeConv.test.cpp
- Unpack.test.cpp
- While.test.cpp)
+macro(REGISTER_KERNEL NODE)
+ list(APPEND TEST_SOURCES "${NODE}.test.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp)
-GTest_AddTest(luci_interpreter_kernels_test ${TEST_SOURCES})
-target_link_libraries(luci_interpreter_kernels_test luci_interpreter_kernels)
+GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_KERNELS})
diff --git a/compiler/luci-interpreter/src/kernels/Cast.test.cpp b/compiler/luci-interpreter/src/kernels/Cast.test.cpp
index 42944628d..731260522 100644
--- a/compiler/luci-interpreter/src/kernels/Cast.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Cast.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Cast.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -30,59 +31,209 @@ template <typename T1, typename T2>
void Check(std::initializer_list<int32_t> shape, std::initializer_list<T1> input_data,
std::initializer_list<T2> output_data)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
constexpr DataType input_type = getElementType<T1>();
constexpr DataType output_type = getElementType<T2>();
- Tensor input_tensor = makeInputTensor<input_type>(shape, input_data);
+ Tensor input_tensor = makeInputTensor<input_type>(shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(output_type);
Cast kernel(&input_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
EXPECT_THAT(extractTensorShape(output_tensor), shape);
}
+template <typename T>
+void CheckBoolTo(std::initializer_list<int32_t> shape, std::initializer_list<bool> input_data,
+ std::initializer_list<T> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType input_type = loco::DataType::BOOL;
+ constexpr DataType output_type = getElementType<T>();
+ std::vector<typename DataTypeImpl<input_type>::Type> input_data_converted;
+ for (auto elem : input_data)
+ {
+ input_data_converted.push_back(elem);
+ }
+
+ Tensor input_tensor =
+ makeInputTensor<input_type>(shape, input_data_converted, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(output_type);
+
+ Cast kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), shape);
+}
+
template <typename T> class CastTest : public ::testing::Test
{
};
-using DataTypes = ::testing::Types<uint8_t, int32_t, int64_t>;
-TYPED_TEST_CASE(CastTest, DataTypes);
+using IntDataTypes =
+ ::testing::Types<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>;
+TYPED_TEST_CASE(CastTest, IntDataTypes);
TYPED_TEST(CastTest, FloatToInt)
{
Check<float, TypeParam>(/*shape=*/{1, 1, 1, 4},
/*input_data=*/
{
- 1.43f, 9.99f, 7.0f, 3.12f, //
+ 1.0f, 9.0f, 7.0f, 3.0f, //
},
/*output_data=*/
{
1, 9, 7, 3, //
});
- Check<TypeParam, TypeParam>(/*shape=*/{1, 1, 1, 4},
- /*input_data=*/
- {
- 1, 9, 7, 3, //
- },
- /*output_data=*/
- {
- 1, 9, 7, 3, //
- });
+ SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToFloat)
+{
+ Check<TypeParam, float>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ 1, 9, 7, 3, //
+ },
+ /*output_data=*/
+ {
+ 1.0f, 9.0f, 7.0f, 3.0f, //
+ });
+ SUCCEED();
+}
+
+template <typename T1, typename T2> void check_int()
+{
+ Check<T1, T2>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ 1, 9, 7, 3, //
+ },
+ /*output_data=*/
+ {
+ 1, 9, 7, 3, //
+ });
+ SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToInt)
+{
+ check_int<TypeParam, uint8_t>();
+ check_int<TypeParam, uint16_t>();
+ check_int<TypeParam, uint32_t>();
+ check_int<TypeParam, uint64_t>();
+ check_int<TypeParam, int8_t>();
+ check_int<TypeParam, int16_t>();
+ check_int<TypeParam, int32_t>();
+ check_int<TypeParam, int64_t>();
+ SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToBool)
+{
+ Check<TypeParam, bool>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ 1, 0, 7, 0, //
+ },
+ /*output_data=*/
+ {
+ true, false, true, false, //
+ });
+ SUCCEED();
+}
+
+TYPED_TEST(CastTest, BoolToInt)
+{
+ CheckBoolTo<TypeParam>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ true, false, false, true, //
+ },
+ /*output_data=*/
+ {
+ 1, 0, 0, 1, //
+ });
+ SUCCEED();
+}
+
+TEST(CastTest, FloatToBool)
+{
+ Check<float, bool>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ 1.0f, 0.0f, 7.0f, 0.0f, //
+ },
+ /*output_data=*/
+ {
+ true, false, true, false, //
+ });
+ SUCCEED();
+}
+
+TEST(CastTest, BoolToFloat)
+{
+ CheckBoolTo<float>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ true, false, false, true, //
+ },
+ /*output_data=*/
+ {
+ 1.0f, 0.0f, 0.0f, 1.0f, //
+ });
+ SUCCEED();
+}
+
+TEST(CastTest, FloatToFloat)
+{
+ Check<float, float>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ 1.0f, 0.0f, 7.0f, 0.0f, //
+ },
+ /*output_data=*/
+ {
+ 1.0f, 0.0f, 7.0f, 0.0f, //
+ });
+ SUCCEED();
+}
+
+TEST(CastTest, BoolToBool)
+{
+ CheckBoolTo<bool>(/*shape=*/{1, 1, 1, 4},
+ /*input_data=*/
+ {
+ true, true, false, false, //
+ },
+ /*output_data=*/
+ {
+ true, true, false, false, //
+ });
+ SUCCEED();
}
TEST(CastTest, UnsupportedType_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, {
- 1, 2, 7, 8, //
- 1, 9, 7, 3, //
- });
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4},
+ {
+ 1, 2, 7, 8, //
+ 1, 9, 7, 3, //
+ },
+ memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::Unknown);
Cast kernel(&input_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
+ SUCCEED();
}
} // namespace
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
index e3376c13d..7cfdf34b9 100644
--- a/compiler/luci-interpreter/src/kernels/Concatenation.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
@@ -18,7 +18,7 @@
#include "kernels/Concatenation.h"
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/concatenation.h>
#include <stdexcept>
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
index ee9b7d0d3..e4b50611a 100644
--- a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Concatenation.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -26,12 +27,22 @@ namespace
using namespace testing;
-TEST(ConcatenationTest, Float)
+class ConcatenationTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ConcatenationTest, Float)
{
std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ConcatenationParams params{};
@@ -42,6 +53,10 @@ TEST(ConcatenationTest, Float)
Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
kernel.configure();
+ for (auto t : kernel.getOutputTensors())
+ {
+ _memory_manager->allocate_memory(*t);
+ }
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -53,6 +68,7 @@ TEST(ConcatenationTest, Float)
Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -64,6 +80,7 @@ TEST(ConcatenationTest, Float)
Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -75,6 +92,7 @@ TEST(ConcatenationTest, Float)
Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -82,7 +100,7 @@ TEST(ConcatenationTest, Float)
}
}
-TEST(ConcatenationTest, Input_Number_Check_NEG)
+TEST_F(ConcatenationTest, Input_Number_Check_NEG)
{
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ConcatenationParams params{};
@@ -94,12 +112,14 @@ TEST(ConcatenationTest, Input_Number_Check_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(ConcatenationTest, Invalid_Axis_NEG)
+TEST_F(ConcatenationTest, Invalid_Axis_NEG)
{
std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ConcatenationParams params{};
@@ -110,12 +130,13 @@ TEST(ConcatenationTest, Invalid_Axis_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(ConcatenationTest, Mismatching_Input_Type_NEG)
+TEST_F(ConcatenationTest, Mismatching_Input_Type_NEG)
{
std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
std::vector<uint8_t> input2_data{7, 8, 9, 10, 11, 12};
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ConcatenationParams params{};
@@ -126,12 +147,14 @@ TEST(ConcatenationTest, Mismatching_Input_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
{
std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ConcatenationParams params{};
@@ -142,12 +165,14 @@ TEST(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(ConcatenationTest, Mismatching_Input_Dimension_NEG)
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG)
{
std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
std::vector<float> input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15};
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ConcatenationParams params{};
@@ -158,12 +183,12 @@ TEST(ConcatenationTest, Mismatching_Input_Dimension_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(ConcatenationTest, Unsupported_Configure_Type_NEG)
+TEST_F(ConcatenationTest, Unsupported_Configure_Type_NEG)
{
std::vector<int8_t> input1_data{1, 2, 3, 4, 5, 6};
std::vector<int8_t> input2_data{7, 8, 9, 10, 11, 12};
- Tensor input1_tensor = makeInputTensor<DataType::S8>({2, 3}, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 3}, input2_data);
+ Tensor input1_tensor = makeInputTensor<DataType::S8>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 3}, input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S8);
ConcatenationParams params{};
@@ -175,12 +200,14 @@ TEST(ConcatenationTest, Unsupported_Configure_Type_NEG)
}
// TODO: Remove this test when concat w/ fused_activation is supported
-TEST(ConcatenationTest, With_Fused_Activation_NEG)
+TEST_F(ConcatenationTest, With_Fused_Activation_NEG)
{
std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ConcatenationParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
index 56ca96a34..fb5e063a9 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
@@ -19,7 +19,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+#include "PALConv2d.h"
#include <stdexcept>
#include <thread>
@@ -30,8 +30,8 @@ namespace kernels
{
Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
- const Conv2DParams &params)
- : KernelWithParams<Conv2DParams>({input, filter, bias}, {output}, params)
+ Tensor *im2col, const Conv2DParams &params)
+ : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, im2col}, params)
{
}
@@ -45,7 +45,7 @@ void Conv2D::configure()
// (3) | uint8 uint8 int32 uint8 | quantized
// (4) | int8 int8 int32 int8 | quantized per channel
//
- // We only support (1) and (3) for now, and additionally the following:
+ // We only support (1), (3) and (4) for now, and additionally the following:
// | input filter bias output |
// ----+---------------------------+
// (5) | int16 int16 int64 int16 |
@@ -58,6 +58,17 @@ void Conv2D::configure()
{
LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
}
+ else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+ static_cast<size_t>(filter()->shape().dim(0)));
+ for (auto zerop : filter()->zero_points())
+ {
+ LUCI_INTERPRETER_CHECK(zerop == 0);
+ }
+ }
else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
{
LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
@@ -103,23 +114,20 @@ void Conv2D::configure()
_params.dilation_height_factor != 1 || _params.dilation_width_factor != 1;
const bool need_non_dilated_im2col = _params.stride_height != 1 || _params.stride_width != 1 ||
filter_height != 1 || filter_width != 1;
- const bool need_im2col =
+ _need_im2col =
input()->element_type() != DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col);
- if (need_im2col)
+ if (_need_im2col)
{
const int input_depth = input_shape.dim(3);
Shape im2col_shape{batches, output_height, output_width,
input_depth * filter_height * filter_width};
- try
- {
- _im2col =
- std::make_unique<Tensor>(input()->element_type(), im2col_shape, AffineQuantization{}, "");
- }
- catch (std::bad_alloc &ba)
- {
- // Failed memory allocation
- _im2col = nullptr;
- }
+ auto im2col = getOutputTensors()[1];
+ im2col->resize(im2col_shape);
+ }
+ else
+ {
+ auto im2col = getOutputTensors()[1];
+ im2col->set_allocatable(false);
}
}
@@ -147,14 +155,15 @@ void Conv2D::execute() const
evalQuantizedPerChannel();
}
break;
+ case DataType::S8:
+ evalQuantizedS8PerChannel();
+ break;
case DataType::S16:
evalQuantizedS16();
break;
default:
throw std::runtime_error("Unsupported type.");
}
- if (!!_im2col)
- _im2col->deallocate();
}
void Conv2D::evalFloat() const
@@ -173,32 +182,16 @@ void Conv2D::evalFloat() const
params.float_activation_min = activation_min;
params.float_activation_max = activation_max;
- if (_im2col)
+ float *im2col_data = nullptr;
+ auto im2col = getOutputTensors()[1];
+ if (_need_im2col)
{
- try
- {
- tflite::optimized_ops::Conv(
- params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
- getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
- getTensorShape(output()), getTensorData<float>(output()), getTensorShape(_im2col.get()),
- getTensorData<float>(_im2col.get()));
- }
- catch (std::bad_alloc &ba)
- {
- // Failed memory allocation
- _im2col->deallocate();
-
- tflite::reference_ops::Conv(
- params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
- getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
- getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(), nullptr);
- }
+ im2col_data = im2col->data<float>();
}
- else
- tflite::reference_ops::Conv(
- params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
- getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
- getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(), nullptr);
+ luci_interpreter_pal::Conv(
+ params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+ getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+ getTensorShape(output()), getTensorData<float>(output()), getTensorShape(im2col), im2col_data);
}
void Conv2D::evalQuantized() const
@@ -232,16 +225,12 @@ void Conv2D::evalQuantized() const
params.quantized_activation_min = activation_min;
params.quantized_activation_max = activation_max;
- // TODO This should only be done once (although it takes only a few microseconds).
- // Also, the user should be able to adjust the number of threads.
- auto gemmlowp_context = std::make_unique<gemmlowp::GemmContext>();
- gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
-
- tflite::optimized_ops::Conv(
- params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
- getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
- getTensorShape(output()), getTensorData<uint8_t>(output()), getTensorShape(_im2col.get()),
- getTensorData<uint8_t>(_im2col.get()), gemmlowp_context.get());
+ auto im2col = getOutputTensors()[1];
+ luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(filter()), getTensorData<uint8_t>(filter()),
+ getTensorShape(bias()), getTensorData<int32_t>(bias()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()),
+ getTensorShape(im2col), getTensorData<uint8_t>(im2col));
}
void Conv2D::evalQuantizedPerChannel() const
@@ -330,6 +319,54 @@ void Conv2D::evalQuantizedPerChannel() const
}
}
+void Conv2D::evalQuantizedS8PerChannel() const
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::ConvParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+ // The kernel expects filter zero points to be negated.
+ params.input_offset = -input()->zero_point(); // Note the '-'.
+ params.weights_offset = 0; // Unused in tflite code
+ params.output_offset = output()->zero_point();
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ const std::vector<double> effective_output_scales =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ std::vector<ChannelQuantMultipliers> quant_multipliers =
+ quantizeMultipliers(effective_output_scales);
+
+ std::vector<int32_t> shifts;
+ std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+ [](ChannelQuantMultipliers cm) { return cm.shift; });
+ std::vector<int32_t> multipliers;
+ std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+ std::back_inserter(multipliers),
+ [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+ int8_t *im2col_data = nullptr;
+ auto im2col = getOutputTensors()[1];
+ if (_need_im2col)
+ {
+ im2col_data = im2col->data<int8_t>();
+ }
+
+ luci_interpreter_pal::ConvPerChannel(
+ params, multipliers.data(), shifts.data(), getTensorShape(input()),
+ getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+ getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+ getTensorData<int8_t>(output()), getTensorShape(im2col), im2col_data);
+}
+
void Conv2D::evalQuantizedS16() const
{
const auto *input_data = getTensorData<int16_t>(input());
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-interpreter/src/kernels/Conv2D.h
index 86f73c251..5f1317638 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.h
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.h
@@ -31,7 +31,7 @@ class Conv2D : public KernelWithParams<Conv2DParams>
{
public:
Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
- const Conv2DParams &params);
+ Tensor *im2col, const Conv2DParams &params);
const Tensor *input() const { return _inputs[0]; }
const Tensor *filter() const { return _inputs[1]; }
@@ -45,10 +45,11 @@ private:
void evalFloat() const;
void evalQuantized() const;
void evalQuantizedPerChannel() const;
+ void evalQuantizedS8PerChannel() const;
void evalQuantizedS16() const;
private:
- std::unique_ptr<Tensor> _im2col;
+ bool _need_im2col = false;
int32_t _padding_height{};
int32_t _padding_width{};
};
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
index 8610a4fe6..277c280f5 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Conv2D.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -26,7 +27,15 @@ namespace
using namespace testing;
-TEST(Conv2DTest, Float)
+class Conv2DTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(Conv2DTest, Float)
{
Shape input_shape{1, 4, 3, 2};
Shape filter_shape{2, 2, 2, 2};
@@ -44,9 +53,13 @@ TEST(Conv2DTest, Float)
-8, -6, 7, 5, // out = 1, row = 1
};
std::vector<float> bias_data{1, 2};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Conv2DParams params{};
@@ -57,8 +70,10 @@ TEST(Conv2DTest, Float)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
kernel.configure();
+ _memory_manager->allocate_memory(im2col);
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{
@@ -70,7 +85,55 @@ TEST(Conv2DTest, Float)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(Conv2DTest, FloatCheck)
+TEST_F(Conv2DTest, FloatPointwise)
+{
+ Shape input_shape{1, 2, 2, 2};
+ Shape filter_shape{2, 1, 1, 2};
+ Shape bias_shape{2};
+ std::vector<float> input_data{
+ 1, 2, // row = 0, col = 0
+ 3, 4, // row = 0, col = 1
+ 5, 6, // row = 1, col = 0
+ 7, 8, // row = 1, col = 1
+ };
+ std::vector<float> filter_data{
+ -1, 2, // out = 0
+ -3, 4, // out = 1
+ };
+ std::vector<float> bias_data{1, 2};
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 1;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::RELU;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(im2col);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 4, 7, 6, 9, // row = 0
+ 8, 11, 10, 13, // row = 1
+ };
+ std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, FloatCheck)
{
Shape input_shape{2, 2, 4, 1};
Shape filter_shape{3, 2, 2, 1};
@@ -89,9 +152,13 @@ TEST(Conv2DTest, FloatCheck)
-1, -1, 1, 1, // third 2x2 filter
};
std::vector<float> bias_data{1, 2, 3};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Conv2DParams params{};
@@ -102,8 +169,10 @@ TEST(Conv2DTest, FloatCheck)
params.dilation_width_factor = 1;
params.activation = Activation::NONE;
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
kernel.execute();
std::vector<float> ref_output_data{
@@ -117,7 +186,7 @@ TEST(Conv2DTest, FloatCheck)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(Conv2DTest, Uint8)
+TEST_F(Conv2DTest, Uint8)
{
std::vector<float> input_data{
// First batch
@@ -137,12 +206,15 @@ TEST(Conv2DTest, Uint8)
std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
- Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first,
- input_quant_param.second, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first,
- input_quant_param.second, filter_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first, input_quant_param.second,
+ filter_data, _memory_manager.get());
Tensor bias_tensor = makeInputTensor<DataType::S32>(
- {3}, input_quant_param.first * input_quant_param.first, 0, bias_data);
+ {3}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::U8, Shape({}), {}, "");
Tensor output_tensor =
makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
@@ -154,8 +226,10 @@ TEST(Conv2DTest, Uint8)
params.dilation_width_factor = 1;
params.activation = Activation::NONE;
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
kernel.execute();
std::vector<float> ref_output_data{
@@ -169,7 +243,7 @@ TEST(Conv2DTest, Uint8)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(Conv2DTest, Uint8_CWQ)
+TEST_F(Conv2DTest, Uint8_CWQ)
{
const int output_channels = 3;
std::vector<float> input_data{
@@ -209,12 +283,14 @@ TEST(Conv2DTest, Uint8_CWQ)
bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
std::vector<int32_t> zerop(output_channels, 0);
- Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first,
- input_quant_param.second, input_data);
- Tensor filter_tensor =
- makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 0, filter_data);
- Tensor bias_tensor =
- makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+ 0, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+ bias_data, _memory_manager.get());
+ Tensor im2col(DataType::U8, Shape({}), {}, "");
Tensor output_tensor =
makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
@@ -226,8 +302,10 @@ TEST(Conv2DTest, Uint8_CWQ)
params.dilation_width_factor = 1;
params.activation = Activation::NONE;
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
kernel.execute();
std::vector<float> ref_output_data{
@@ -241,7 +319,83 @@ TEST(Conv2DTest, Uint8_CWQ)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(Conv2DTest, SInt16)
+TEST_F(Conv2DTest, SInt8_CWQ)
+{
+ const int output_channels = 3;
+ std::vector<float> input_data{
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
+ };
+ std::vector<float> bias_data{1, 2, 3};
+ Shape filter_shape{output_channels, 2, 2, 1};
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(0, 4);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+ std::vector<std::pair<float, int32_t>> filter_quant_params;
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+ std::vector<float> filter_scales;
+ std::vector<int32_t> filter_zerops;
+ for (auto iter : filter_quant_params)
+ {
+ filter_scales.push_back(iter.first);
+ filter_zerops.push_back(iter.second);
+ }
+
+ std::vector<float> bias_scales;
+ for (int i = 0; i < output_channels; ++i)
+ bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+ std::vector<int32_t> zerop(output_channels, 0);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+ 0, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+ bias_data, _memory_manager.get());
+ Tensor im2col(DataType::S8, Shape({}), {}, "");
+ Tensor output_tensor =
+ makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+ Conv2DParams params{};
+ params.padding = Padding::VALID;
+ params.stride_height = 2;
+ params.stride_width = 2;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
+ };
+ std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+ EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, SInt16)
{
Shape input_shape{1, 4, 3, 2};
Shape filter_shape{2, 2, 2, 2};
@@ -266,9 +420,13 @@ TEST(Conv2DTest, SInt16)
0, 40, 0, 44, // row = 1
};
- Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::S16, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
Conv2DParams params{};
@@ -279,15 +437,17 @@ TEST(Conv2DTest, SInt16)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
}
-TEST(Conv2DTest, SInt16_CWQ_weights)
+TEST_F(Conv2DTest, SInt16_CWQ_weights)
{
Shape input_shape{1, 2, 2, 2}; // Batch x H x W x C
Shape filter_shape{3, 1, 1, 2}; // Out channels x H x W x In Channels
@@ -321,10 +481,13 @@ TEST(Conv2DTest, SInt16_CWQ_weights)
bias_scales.push_back(filter_scales[i] * input_scale);
std::vector<int32_t> zerop = {0, 0, 0};
- Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data);
- Tensor filter_tensor =
- makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+ filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+ _memory_manager.get());
+ Tensor im2col(DataType::S16, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
Conv2DParams params{};
@@ -335,15 +498,17 @@ TEST(Conv2DTest, SInt16_CWQ_weights)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(im2col);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
}
-TEST(Conv2DTest, Unsupported_Type_Configure_NEG)
+TEST_F(Conv2DTest, Unsupported_Type_Configure_NEG)
{
Shape input_shape{1, 4, 3, 2};
Shape filter_shape{2, 2, 2, 2};
@@ -361,9 +526,13 @@ TEST(Conv2DTest, Unsupported_Type_Configure_NEG)
-8, -6, 7, 5, // out = 1, row = 1
};
std::vector<float> bias_data{1, 2};
- Tensor input_tensor = makeInputTensor<DataType::S32>(input_shape, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Conv2DParams params{};
@@ -374,11 +543,11 @@ TEST(Conv2DTest, Unsupported_Type_Configure_NEG)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(Conv2DTest, Invalid_Bias_Type_NEG)
+TEST_F(Conv2DTest, Invalid_Bias_Type_NEG)
{
Shape input_shape{1, 4, 3, 2};
Shape filter_shape{2, 2, 2, 2};
@@ -396,9 +565,12 @@ TEST(Conv2DTest, Invalid_Bias_Type_NEG)
-8, -6, 7, 5, // out = 1, row = 1
};
std::vector<uint8_t> bias_data{1, 2};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Conv2DParams params{};
@@ -409,11 +581,11 @@ TEST(Conv2DTest, Invalid_Bias_Type_NEG)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(Conv2DTest, Invalid_Bias_Data_NEG)
+TEST_F(Conv2DTest, Invalid_Bias_Data_NEG)
{
Shape input_shape{1, 4, 3, 2};
Shape filter_shape{2, 2, 2, 2};
@@ -431,9 +603,13 @@ TEST(Conv2DTest, Invalid_Bias_Data_NEG)
-8, -6, 7, 5, // out = 1, row = 1
};
std::vector<float> bias_data{1, 2, 3};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Conv2DParams params{};
@@ -444,11 +620,11 @@ TEST(Conv2DTest, Invalid_Bias_Data_NEG)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(Conv2DTest, Invalid_Input_Shape_NEG)
+TEST_F(Conv2DTest, Invalid_Input_Shape_NEG)
{
Shape input_shape{1, 4, 6, 1};
Shape filter_shape{2, 2, 2, 2};
@@ -466,9 +642,13 @@ TEST(Conv2DTest, Invalid_Input_Shape_NEG)
-8, -6, 7, 5, // out = 1, row = 1
};
std::vector<float> bias_data{1, 2};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Conv2DParams params{};
@@ -479,7 +659,7 @@ TEST(Conv2DTest, Invalid_Input_Shape_NEG)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
EXPECT_ANY_THROW(kernel.configure());
}
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp
index f2b9e4ccc..3a9acd1d4 100644
--- a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp
@@ -16,7 +16,7 @@
#include "DepthToSpace.h"
#include "Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALDepthToSpace.h"
namespace luci_interpreter
{
@@ -62,14 +62,14 @@ void DepthToSpace::execute() const
switch (input()->element_type())
{
case DataType::FLOAT32:
- tflite::optimized_ops::DepthToSpace(op_params, getTensorShape(input()),
- getTensorData<float>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
+ luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
break;
case DataType::U8:
- tflite::optimized_ops::DepthToSpace(op_params, getTensorShape(input()),
- getTensorData<uint8_t>(input()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
+ luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
break;
default:
throw std::runtime_error("Unsupported Type.");
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
index 3dee4ad36..9b1c09ba9 100644
--- a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/DepthToSpace.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -35,12 +36,14 @@ TYPED_TEST_CASE(DepthToSpaceTest, DataTypes);
TYPED_TEST(DepthToSpaceTest, SimpleCase)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8};
Shape input_shape{1, 1, 2, 4};
std::vector<TypeParam> output_data{1, 2, 5, 6, 3, 4, 7, 8};
std::vector<int32_t> output_shape{1, 2, 4, 1};
- Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
DepthToSpaceParams params{};
@@ -48,6 +51,7 @@ TYPED_TEST(DepthToSpaceTest, SimpleCase)
DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
@@ -57,10 +61,12 @@ TYPED_TEST(DepthToSpaceTest, SimpleCase)
TEST(DepthToSpaceTest, InvalidInputShape_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
Shape input_shape{1, 2, 4};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
DepthToSpaceParams params{};
@@ -72,10 +78,12 @@ TEST(DepthToSpaceTest, InvalidInputShape_NEG)
TEST(DepthToSpaceTest, InOutTypeMismatch_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
Shape input_shape{1, 1, 2, 4};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
DepthToSpaceParams params{};
@@ -87,10 +95,12 @@ TEST(DepthToSpaceTest, InOutTypeMismatch_NEG)
TEST(DepthToSpaceTest, InvalidBlockSize_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
Shape input_shape{1, 1, 2, 4};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
DepthToSpaceParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
index 1452f4421..f2dbf6c68 100644
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
@@ -20,6 +20,7 @@
#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
#include <stdexcept>
@@ -45,7 +46,7 @@ void DepthwiseConv2D::configure()
// (4) | int8 int8 int32 int8 | quantized per channel
// (5) | int16 int8 int64 int16 | quantized per channel 16x8
//
- // We only support (1) and (3) for now, and additionally the following:
+ // We only support (1), (3) and (4) for now, and additionally the following:
// | input filter bias output |
// ----+---------------------------+
// (5) | int16 int16 int64 int16 |
@@ -58,6 +59,17 @@ void DepthwiseConv2D::configure()
{
LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
}
+ else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+ LUCI_INTERPRETER_CHECK(static_cast<uint32_t>(filter()->shape().dim(3)) ==
+ filter()->scales().size());
+ for (auto zerop : filter()->zero_points())
+ {
+ LUCI_INTERPRETER_CHECK(zerop == 0);
+ }
+ LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+ }
else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
{
LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
@@ -123,6 +135,9 @@ void DepthwiseConv2D::execute() const
evalQuantizedPerChannel();
}
break;
+ case DataType::S8:
+ evalQuantizedS8PerChannel();
+ break;
case DataType::S16:
evalQuantizedS16();
break;
@@ -283,6 +298,52 @@ void DepthwiseConv2D::evalQuantized() const
getTensorShape(output()), getTensorData<uint8_t>(output()));
}
+void DepthwiseConv2D::evalQuantizedS8PerChannel() const
+{
+ int32_t activation_min{};
+ int32_t activation_max{};
+ calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+ tflite::DepthwiseParams params{};
+
+ params.padding_type = tflite::PaddingType::kSame;
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+ params.depth_multiplier = _params.depth_multiplier;
+ // The kernel expects input and filter zero points to be negated.
+ params.input_offset = -input()->zero_point(); // Note the '-'.
+ params.weights_offset = 0;
+ params.output_offset = output()->zero_point();
+ params.output_multiplier = 1; // unused in tflite code
+ params.output_shift = 0; // unused in tflite code
+ params.quantized_activation_min = activation_min;
+ params.quantized_activation_max = activation_max;
+
+ const std::vector<double> effective_output_scales =
+ getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+ std::vector<ChannelQuantMultipliers> quant_multipliers =
+ quantizeMultipliers(effective_output_scales);
+
+ std::vector<int32_t> shifts;
+ std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+ [](ChannelQuantMultipliers cm) { return cm.shift; });
+ std::vector<int32_t> multipliers;
+ std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+ std::back_inserter(multipliers),
+ [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+ tflite::reference_integer_ops::DepthwiseConvPerChannel(
+ params, multipliers.data(), shifts.data(), getTensorShape(input()),
+ getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+ getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+ getTensorData<int8_t>(output()));
+}
+
void DepthwiseConv2D::evalQuantizedS16() const
{
const auto *input_data = getTensorData<int16_t>(input());
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
index 6d700dd0f..6cffd6583 100644
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
@@ -43,6 +43,7 @@ private:
void evalFloat() const;
void evalQuantized() const;
void evalQuantizedPerChannel() const;
+ void evalQuantizedS8PerChannel() const;
void evalQuantizedS16() const;
private:
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
index 3e2f434dd..74975899a 100644
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/DepthwiseConv2D.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -26,7 +27,15 @@ namespace
using namespace testing;
-TEST(DepthwiseConv2DTest, Float)
+class DepthwiseConv2DTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DepthwiseConv2DTest, Float)
{
Shape input_shape{1, 4, 2, 2};
Shape filter_shape{1, 2, 2, 4};
@@ -44,9 +53,12 @@ TEST(DepthwiseConv2DTest, Float)
13, -14, 15, -16, //
};
std::vector<float> bias_data{1, 2, 3, 4};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
DepthwiseConv2DParams params{};
@@ -60,6 +72,7 @@ TEST(DepthwiseConv2DTest, Float)
DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{
@@ -70,7 +83,7 @@ TEST(DepthwiseConv2DTest, Float)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
}
-TEST(DepthwiseConv2DTest, Uint8)
+TEST_F(DepthwiseConv2DTest, Uint8)
{
std::vector<float> input_data{
1, 2, 7, 8, // column 1
@@ -88,12 +101,14 @@ TEST(DepthwiseConv2DTest, Uint8)
std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 3, 2, 2}, input_quant_param.first,
- input_quant_param.second, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first,
- input_quant_param.second, filter_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 3, 2, 2}, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first, input_quant_param.second,
+ filter_data, _memory_manager.get());
Tensor bias_tensor = makeInputTensor<DataType::S32>(
- {4}, input_quant_param.first * input_quant_param.first, 0, bias_data);
+ {4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
Tensor output_tensor =
makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
@@ -108,6 +123,7 @@ TEST(DepthwiseConv2DTest, Uint8)
DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{
@@ -118,7 +134,7 @@ TEST(DepthwiseConv2DTest, Uint8)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
}
-TEST(DepthwiseConv2DTest, SInt16)
+TEST_F(DepthwiseConv2DTest, SInt16)
{
Shape input_shape{1, 4, 2, 2};
Shape filter_shape{1, 2, 2, 4};
@@ -143,9 +159,12 @@ TEST(DepthwiseConv2DTest, SInt16)
167, 0, 227, 28, //
};
- Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
DepthwiseConv2DParams params{};
@@ -159,13 +178,14 @@ TEST(DepthwiseConv2DTest, SInt16)
DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
}
-TEST(DepthwiseConv2DTest, SInt16_CWQ_weights)
+TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights)
{
const int output_channels = 4;
Shape input_shape{1, 4, 2, 2};
@@ -197,10 +217,12 @@ TEST(DepthwiseConv2DTest, SInt16_CWQ_weights)
for (int i = 0; i < output_channels; ++i)
bias_scales.push_back(filter_scales[i] * input_scale);
std::vector<int32_t> zerop(4, 0);
- Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data);
- Tensor filter_tensor =
- makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3,
+ filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+ _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
DepthwiseConv2DParams params{};
@@ -214,13 +236,14 @@ TEST(DepthwiseConv2DTest, SInt16_CWQ_weights)
DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
}
-TEST(DepthwiseConv2DTest, Uint8_CWQ_weights)
+TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights)
{
const int output_channels = 4;
Shape input_shape{1, 3, 2, 2};
@@ -267,11 +290,13 @@ TEST(DepthwiseConv2DTest, Uint8_CWQ_weights)
bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
std::vector<int32_t> zerop(output_channels, 0);
- Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
- input_quant_param.second, input_data);
- Tensor filter_tensor =
- makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 3, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+ 3, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data,
+ _memory_manager.get());
Tensor output_tensor =
makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
@@ -286,6 +311,7 @@ TEST(DepthwiseConv2DTest, Uint8_CWQ_weights)
DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
@@ -293,7 +319,83 @@ TEST(DepthwiseConv2DTest, Uint8_CWQ_weights)
FloatArrayNear(ref_output_data, output_quant_param.first));
}
-TEST(DepthwiseConv2DTest, InvalidBiasType_NEG)
+TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights)
+{
+ const int output_channels = 4;
+ Shape input_shape{1, 3, 2, 2};
+ Shape filter_shape{1, 2, 2, output_channels};
+ Shape bias_shape{4};
+ std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+ std::vector<float> input_data{
+ 1, 2, 7, 8, //
+ 3, 4, 9, 10, //
+ 5, 6, 11, 12, //
+ };
+ std::vector<float> filter_data{
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
+ };
+ std::vector<float> bias_data{1, 2, 3, 4};
+ std::vector<float> ref_output_data{
+ 71, -34, 99, -20, //
+ 91, -26, 127, -4, //
+ };
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-128, 127);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+ std::vector<std::pair<float, int32_t>> filter_quant_params;
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+ filter_quant_params.push_back(std::pair<float, int32_t>(1, 0));
+ filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+ std::vector<float> filter_scales;
+ std::vector<int32_t> filter_zerops;
+ for (auto iter : filter_quant_params)
+ {
+ filter_scales.push_back(iter.first);
+ filter_zerops.push_back(iter.second);
+ }
+
+ std::vector<float> bias_scales;
+ for (int i = 0; i < output_channels; ++i)
+ bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+ std::vector<int32_t> zerop(output_channels, 0);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+ 3, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data,
+ _memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+ DepthwiseConv2DParams params{};
+ params.padding = Padding::VALID;
+ params.depth_multiplier = 2;
+ params.stride_height = 1;
+ params.stride_width = 1;
+ params.dilation_height_factor = 1;
+ params.dilation_width_factor = 1;
+ params.activation = Activation::NONE;
+
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, output_quant_param.first));
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG)
{
Shape input_shape{1, 4, 2, 2};
Shape filter_shape{1, 2, 2, 4};
@@ -311,9 +413,11 @@ TEST(DepthwiseConv2DTest, InvalidBiasType_NEG)
13, -14, 15, -16, //
};
std::vector<int32_t> bias_data{1, 2, 3, 4};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
DepthwiseConv2DParams params{};
@@ -329,7 +433,7 @@ TEST(DepthwiseConv2DTest, InvalidBiasType_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
+TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
{
Shape input_shape{1, 4, 2, 2};
Shape filter_shape{1, 2, 2, 4};
@@ -347,9 +451,12 @@ TEST(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
13, -14, 15, -16, //
};
std::vector<float> bias_data{1, 2, 3, 4};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
DepthwiseConv2DParams params{};
@@ -365,7 +472,7 @@ TEST(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(DepthwiseConv2DTest, InvalidInputShape_NEG)
+TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG)
{
Shape input_shape{4, 2, 2};
Shape filter_shape{2, 2, 4};
@@ -383,9 +490,12 @@ TEST(DepthwiseConv2DTest, InvalidInputShape_NEG)
13, -14, 15, -16, //
};
std::vector<float> bias_data{1, 2, 3, 4};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
DepthwiseConv2DParams params{};
@@ -401,7 +511,7 @@ TEST(DepthwiseConv2DTest, InvalidInputShape_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(DepthwiseConv2DTest, InvalidFilterShape_NEG)
+TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG)
{
Shape input_shape{1, 4, 2, 2};
Shape filter_shape{2, 1, 2, 4};
@@ -419,9 +529,12 @@ TEST(DepthwiseConv2DTest, InvalidFilterShape_NEG)
13, -14, 15, -16, //
};
std::vector<float> bias_data{1, 2, 3, 4};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
DepthwiseConv2DParams params{};
@@ -437,7 +550,7 @@ TEST(DepthwiseConv2DTest, InvalidFilterShape_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(DepthwiseConv2DTest, InvalidBiasDim_NEG)
+TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG)
{
Shape input_shape{1, 4, 2, 2};
Shape filter_shape{1, 2, 4, 2};
@@ -455,9 +568,12 @@ TEST(DepthwiseConv2DTest, InvalidBiasDim_NEG)
13, -14, 15, -16, //
};
std::vector<float> bias_data{1, 2, 3, 4};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
DepthwiseConv2DParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-interpreter/src/kernels/Div.cpp
index db1496d37..0e52ba1f0 100644
--- a/compiler/luci-interpreter/src/kernels/Div.cpp
+++ b/compiler/luci-interpreter/src/kernels/Div.cpp
@@ -18,7 +18,8 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/div.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
namespace luci_interpreter
{
diff --git a/compiler/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-interpreter/src/kernels/Div.test.cpp
index 1a0c4af15..021d68d06 100644
--- a/compiler/luci-interpreter/src/kernels/Div.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Div.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Div.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,6 +28,14 @@ namespace
using namespace testing;
+class DivTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
float GetTolerance(float min, float max)
{
const float kQuantizedStep = (max - min) / 255.0f;
@@ -34,7 +43,7 @@ float GetTolerance(float min, float max)
return kQuantizedTolerance;
}
-TEST(DivTest, Float)
+TEST_F(DivTest, Float)
{
Shape base_shape = {2, 3, 1, 1};
@@ -44,8 +53,10 @@ TEST(DivTest, Float)
std::vector<float> input2_data{0.2f, 1.6f, 0.5f, 0.4f, 1.6f, 0.4f};
std::vector<float> test_outputs{1.5f, 1.4375f, 1.8f, 1.25f, 0.5f, 2.75f};
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input2_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -54,13 +65,14 @@ TEST(DivTest, Float)
Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
}
-TEST(DivTest, FloatBroadcast)
+TEST_F(DivTest, FloatBroadcast)
{
Shape input1_shape = {1, 3};
Shape input2_shape = {3, 1};
@@ -69,8 +81,10 @@ TEST(DivTest, FloatBroadcast)
std::vector<float> input2_data{0.2f, 1.6f, 0.5f};
std::vector<float> test_outputs{0.f, 11.5f, 4.5f, 0.f, 1.4375f, 0.5625f, 0.f, 4.6f, 1.8f};
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
@@ -79,12 +93,13 @@ TEST(DivTest, FloatBroadcast)
Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
}
-TEST(DivTest, Uint8)
+TEST_F(DivTest, Uint8)
{
Shape base_shape = {1, 2, 2, 1};
@@ -98,10 +113,10 @@ TEST(DivTest, Uint8)
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.f, 1.f);
- Tensor input1_tensor =
- makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input1_data);
- Tensor input2_tensor =
- makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input2_data);
+ Tensor input1_tensor = makeInputTensor<DataType::U8>(
+ base_shape, quant_param.first, quant_param.second, input1_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U8>(
+ base_shape, quant_param.first, quant_param.second, input2_data, _memory_manager.get());
Tensor output_tensor =
makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
@@ -111,6 +126,7 @@ TEST(DivTest, Uint8)
Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -118,10 +134,10 @@ TEST(DivTest, Uint8)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
}
-TEST(DivTest, Input_Output_Type_NEG)
+TEST_F(DivTest, Input_Output_Type_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2});
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
DivParams params{};
@@ -131,10 +147,10 @@ TEST(DivTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(DivTest, Invalid_Input_Type_NEG)
+TEST_F(DivTest, Invalid_Input_Type_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1});
- Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2});
+ Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S64);
DivParams params{};
@@ -142,6 +158,7 @@ TEST(DivTest, Invalid_Input_Type_NEG)
Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
EXPECT_ANY_THROW(kernel.execute());
}
diff --git a/compiler/luci-interpreter/src/kernels/Elu.cpp b/compiler/luci-interpreter/src/kernels/Elu.cpp
index 456396055..697d63be4 100644
--- a/compiler/luci-interpreter/src/kernels/Elu.cpp
+++ b/compiler/luci-interpreter/src/kernels/Elu.cpp
@@ -17,7 +17,7 @@
#include "kernels/Elu.h"
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALElu.h"
#include <stdexcept>
@@ -40,8 +40,8 @@ void Elu::execute() const
switch (input()->element_type())
{
case DataType::FLOAT32:
- tflite::optimized_ops::Elu(getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
+ luci_interpreter_pal::Elu(getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
break;
default:
throw std::runtime_error("Unsupported type.");
diff --git a/compiler/luci-interpreter/src/kernels/Elu.test.cpp b/compiler/luci-interpreter/src/kernels/Elu.test.cpp
index e26eed03e..814499cdb 100644
--- a/compiler/luci-interpreter/src/kernels/Elu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Elu.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Elu.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -29,11 +30,14 @@ using namespace testing;
void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
std::initializer_list<float> input_data, std::initializer_list<float> output_data)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Elu kernel(&input_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
(void)output_shape;
@@ -58,12 +62,14 @@ TEST(EluTest, SimpleElu)
TEST(EluTest, InOutTypeMismatch_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
0, -6, 2, -4, //
3, -2, 10, -0.1, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
Elu kernel(&input_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/Equal.h b/compiler/luci-interpreter/src/kernels/Equal.h
index 69b3be774..11f025eac 100644
--- a/compiler/luci-interpreter/src/kernels/Equal.h
+++ b/compiler/luci-interpreter/src/kernels/Equal.h
@@ -42,9 +42,9 @@ private:
private:
int32_t _x_multiplier = 0;
- int32_t _x_shift = 0;
+ int _x_shift = 0;
int32_t _y_multiplier = 0;
- int32_t _y_shift = 0;
+ int _y_shift = 0;
};
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-interpreter/src/kernels/Equal.test.cpp
index ba2827ba9..46a0f97d8 100644
--- a/compiler/luci-interpreter/src/kernels/Equal.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Equal.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Equal.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,7 +28,15 @@ namespace
using namespace testing;
-TEST(EqualTest, FloatSimple)
+class EqualTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(EqualTest, FloatSimple)
{
std::vector<float> x_data{
0.5, 0.7, 0.9, // Row 1
@@ -44,19 +53,20 @@ TEST(EqualTest, FloatSimple)
false, true, false, // Row 2
};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Equal kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
}
-TEST(EqualTest, FloatBroardcast)
+TEST_F(EqualTest, FloatBroardcast)
{
std::vector<float> x_data{
0.5, 0.7, 0.9, // Row 1
@@ -76,12 +86,13 @@ TEST(EqualTest, FloatBroardcast)
true, true, true, // Row 4
};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Equal kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
@@ -92,7 +103,7 @@ TEST(EqualTest, FloatBroardcast)
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
-TEST(EqualTest, Uint8Quantized)
+TEST_F(EqualTest, Uint8Quantized)
{
std::vector<float> x_data{
0.5, 0.5, 0.7, 0.9, // Row 1
@@ -110,24 +121,25 @@ TEST(EqualTest, Uint8Quantized)
};
std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Equal kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(EqualTest, Uint8QuantizedBroadcast)
+TEST_F(EqualTest, Uint8QuantizedBroadcast)
{
std::vector<float> x_data{
0.4, -0.8, 0.7, 0.3, // Row 1
@@ -148,34 +160,35 @@ TEST(EqualTest, Uint8QuantizedBroadcast)
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Equal kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(EqualTest, Input_Type_Mismatch_NEG)
+TEST_F(EqualTest, Input_Type_Mismatch_NEG)
{
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Equal kernel(&x_tensor, &y_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(EqualTest, Input_Output_Type_NEG)
+TEST_F(EqualTest, Input_Output_Type_NEG)
{
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Equal kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/Exp.cpp b/compiler/luci-interpreter/src/kernels/Exp.cpp
index f7b115ab3..e7c560a88 100644
--- a/compiler/luci-interpreter/src/kernels/Exp.cpp
+++ b/compiler/luci-interpreter/src/kernels/Exp.cpp
@@ -19,7 +19,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/exp.h>
namespace luci_interpreter
{
diff --git a/compiler/luci-interpreter/src/kernels/Exp.test.cpp b/compiler/luci-interpreter/src/kernels/Exp.test.cpp
index 19b2c141a..a159d9db9 100644
--- a/compiler/luci-interpreter/src/kernels/Exp.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Exp.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Exp.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -29,13 +30,16 @@ using namespace testing;
TEST(ExpTest, Float)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
Shape input_shape{1, 1, 7};
std::vector<float> input_data{0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Exp kernel(&input_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<int32_t> ref_output_shape{1, 1, 7};
diff --git a/compiler/luci-interpreter/src/kernels/Floor.test.cpp b/compiler/luci-interpreter/src/kernels/Floor.test.cpp
index d90d611d9..30076fb54 100644
--- a/compiler/luci-interpreter/src/kernels/Floor.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Floor.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Floor.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -26,7 +27,15 @@ namespace
using namespace testing;
-TEST(FloorTest, SimpleFloat)
+class FloorTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorTest, SimpleFloat)
{
std::initializer_list<int32_t> input_shape{1, 2, 4, 1};
std::vector<float> input_data{
@@ -40,20 +49,22 @@ TEST(FloorTest, SimpleFloat)
3, 7, 10, -1, // Row 2
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Floor kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(FloorTest, Input_Output_Type_NEG)
+TEST_F(FloorTest, Input_Output_Type_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S32);
Floor kernel(&input_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp b/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp
index 16831ca80..3e1b5f18e 100644
--- a/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/FloorDiv.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,7 +28,15 @@ namespace
using namespace testing;
-TEST(FloorDivTest, FloatSimple)
+class FloorDivTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorDivTest, FloatSimple)
{
Shape x_shape{2, 3};
std::vector<float> x_data{
@@ -47,12 +56,13 @@ TEST(FloorDivTest, FloatSimple)
1, 1, 1, // Row 2
};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -60,7 +70,7 @@ TEST(FloorDivTest, FloatSimple)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(FloorDivTest, FloatBroadcast)
+TEST_F(FloorDivTest, FloatBroadcast)
{
Shape x_shape{1, 3};
std::vector<float> x_data{
@@ -81,12 +91,13 @@ TEST(FloorDivTest, FloatBroadcast)
1, 3, -4, // Row 3
};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -94,36 +105,37 @@ TEST(FloorDivTest, FloatBroadcast)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(FloorDivTest, DivByZero_NEG)
+TEST_F(FloorDivTest, DivByZero_NEG)
{
Shape shape{3};
std::vector<float> x_data{1, 0, -1};
std::vector<float> y_data{0, 0, 0};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(shape, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(shape, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(shape, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(shape, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
EXPECT_ANY_THROW(kernel.execute());
}
-TEST(FloorDivTest, Input_Output_Type_Mismatch_NEG)
+TEST_F(FloorDivTest, Input_Output_Type_Mismatch_NEG)
{
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(FloorDivTest, Input_Type_Mismatch_NEG)
+TEST_F(FloorDivTest, Input_Type_Mismatch_NEG)
{
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1});
- Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
index 48433b42d..cfe8f8bf2 100644
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
@@ -19,6 +19,7 @@
#include "kernels/Utils.h"
#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
#include <stdexcept>
@@ -48,6 +49,12 @@ void FullyConnected::configure()
LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::FLOAT32)
}
+ else if (weights()->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8);
+ LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
+ }
else
{
throw std::runtime_error("Unsupported type.");
@@ -77,6 +84,9 @@ void FullyConnected::execute() const
case DataType::U8:
evalQuantized();
break;
+ case DataType::S8:
+ evalQuantizedS8();
+ break;
case DataType::FLOAT32:
evalFloat();
break;
@@ -135,5 +145,38 @@ void FullyConnected::evalQuantized() const
getTensorShape(output()), getTensorData<uint8_t>(output()));
}
+void FullyConnected::evalQuantizedS8() const
+{
+ double real_multiplier = 0.0;
+ int output_shift;
+ int32_t output_activation_min;
+ int32_t output_activation_max;
+ int32_t output_multiplier;
+ real_multiplier =
+ getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
+ quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+ calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
+ &output_activation_max);
+
+ int32_t input_offset = -input()->zero_point();
+ int32_t filter_offset = -weights()->zero_point();
+ int32_t output_offset = output()->zero_point();
+
+ tflite::FullyConnectedParams op_params{};
+ op_params.input_offset = input_offset;
+ op_params.weights_offset = filter_offset;
+ op_params.output_offset = output_offset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.lhs_cacheable = false;
+ op_params.rhs_cacheable = false;
+ tflite::reference_integer_ops::FullyConnected(
+ op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()),
+ getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+ getTensorShape(output()), getTensorData<int8_t>(output()));
+}
+
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.h b/compiler/luci-interpreter/src/kernels/FullyConnected.h
index 204f11ebb..2a7c068c0 100644
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.h
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.h
@@ -42,6 +42,7 @@ public:
private:
void evalFloat() const;
void evalQuantized() const;
+ void evalQuantizedS8() const;
};
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
index 0259d3e1d..b0eda0145 100644
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/FullyConnected.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -32,9 +33,13 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data);
- Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
FullyConnectedParams params{};
@@ -42,6 +47,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -49,21 +55,63 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
}
template <>
+void Check<int8_t>(std::initializer_list<int32_t> input_shape,
+ std::initializer_list<int32_t> weights_shape,
+ std::initializer_list<int32_t> bias_shape,
+ std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data,
+ std::initializer_list<float> weights_data,
+ std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ const float quantized_tolerance = getTolerance(-127, 128, 255);
+ std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-63.5, 64);
+ std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::S8>(weights_shape, input_quant_param.first, input_quant_param.second,
+ weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+ bias_data, memory_manager.get());
+ Tensor output_tensor =
+ makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+ FullyConnectedParams params{};
+ params.activation = Activation::RELU;
+
+ FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <>
void Check<uint8_t>(
std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
const float quantized_tolerance = getTolerance(-127, 128, 255);
std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
- Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
- input_quant_param.second, input_data);
- Tensor weights_tensor = makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first,
- input_quant_param.second, weights_data);
- Tensor bias_tensor = makeInputTensor<DataType::S32>(
- bias_shape, input_quant_param.first * input_quant_param.first, 0, bias_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first, input_quant_param.second,
+ weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+ bias_data, memory_manager.get());
Tensor output_tensor =
makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
@@ -72,6 +120,7 @@ void Check<uint8_t>(
FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -83,7 +132,7 @@ template <typename T> class FullyConnectedTest : public ::testing::Test
{
};
-using DataTypes = ::testing::Types<float, uint8_t>;
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
TYPED_TEST_CASE(FullyConnectedTest, DataTypes);
TYPED_TEST(FullyConnectedTest, Simple)
@@ -121,9 +170,13 @@ TEST(FullyConnectedTest, InvalidBiasType_NEG)
Shape bias_shape{3};
std::vector<int32_t> bias_data{-1, -5, -8};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data);
- Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data);
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
FullyConnectedParams params{};
@@ -149,9 +202,14 @@ TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG)
Shape bias_shape{3};
std::vector<float> bias_data{-1, -5, -8};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data);
- Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
FullyConnectedParams params{};
@@ -180,9 +238,14 @@ TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG)
Shape bias_shape{3};
std::vector<float> bias_data{-1, -5, -8};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data);
- Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor weights_tensor =
+ makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
FullyConnectedParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/Greater.h b/compiler/luci-interpreter/src/kernels/Greater.h
index a65d29f5c..877c139c9 100644
--- a/compiler/luci-interpreter/src/kernels/Greater.h
+++ b/compiler/luci-interpreter/src/kernels/Greater.h
@@ -42,9 +42,9 @@ private:
private:
int32_t _x_multiplier = 0;
- int32_t _x_shift = 0;
+ int _x_shift = 0;
int32_t _y_multiplier = 0;
- int32_t _y_shift = 0;
+ int _y_shift = 0;
};
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-interpreter/src/kernels/Greater.test.cpp
index 3fcc86603..ba3925f17 100644
--- a/compiler/luci-interpreter/src/kernels/Greater.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Greater.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Greater.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,7 +28,15 @@ namespace
using namespace testing;
-TEST(GreaterTest, FloatSimple)
+class GreaterTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GreaterTest, FloatSimple)
{
std::vector<float> x_data{
0.5, 0.7, 0.9, // Row 1
@@ -44,19 +53,20 @@ TEST(GreaterTest, FloatSimple)
true, false, false, // Row 2
};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Greater kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
}
-TEST(GreaterTest, FloatBroardcast)
+TEST_F(GreaterTest, FloatBroardcast)
{
std::vector<float> x_data{
0.5, 0.7, 0.9, // Row 1
@@ -74,12 +84,13 @@ TEST(GreaterTest, FloatBroardcast)
false, false, true, // Row 3
};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Greater kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
@@ -90,7 +101,7 @@ TEST(GreaterTest, FloatBroardcast)
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
-TEST(GreaterTest, Uint8Quantized)
+TEST_F(GreaterTest, Uint8Quantized)
{
std::vector<float> x_data{
0.5, 0.6, 0.7, 0.9, // Row 1
@@ -108,21 +119,22 @@ TEST(GreaterTest, Uint8Quantized)
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Greater kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(GreaterTest, Uint8QuantizedRescale)
+TEST_F(GreaterTest, Uint8QuantizedRescale)
{
std::vector<float> x_data{
0.5, 0.6, 0.7, 0.9, // Row 1
@@ -142,21 +154,22 @@ TEST(GreaterTest, Uint8QuantizedRescale)
std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 3);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Greater kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(GreaterTest, Uint8QuantizedBroadcast)
+TEST_F(GreaterTest, Uint8QuantizedBroadcast)
{
std::vector<float> x_data{
0.4, -0.8, 0.7, 0.3, // Row 1
@@ -175,34 +188,35 @@ TEST(GreaterTest, Uint8QuantizedBroadcast)
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Greater kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(GreaterTest, Input_Type_Mismatch_NEG)
+TEST_F(GreaterTest, Input_Type_Mismatch_NEG)
{
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Greater kernel(&x_tensor, &y_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(GreaterTest, Input_Output_Type_NEG)
+TEST_F(GreaterTest, Input_Output_Type_NEG)
{
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Greater kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-interpreter/src/kernels/GreaterEqual.h
index e948d698f..4a0f48748 100644
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.h
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.h
@@ -42,9 +42,9 @@ private:
private:
int32_t _x_multiplier = 0;
- int32_t _x_shift = 0;
+ int _x_shift = 0;
int32_t _y_multiplier = 0;
- int32_t _y_shift = 0;
+ int _y_shift = 0;
};
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
index 7c79d8abc..a9d172301 100644
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/GreaterEqual.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,7 +28,15 @@ namespace
using namespace testing;
-TEST(GreaterEqualTest, FloatSimple)
+class GreaterEqualTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GreaterEqualTest, FloatSimple)
{
std::vector<float> x_data{
0.5, 0.7, 0.9, // Row 1
@@ -44,19 +53,20 @@ TEST(GreaterEqualTest, FloatSimple)
true, true, false, // Row 2
};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
}
-TEST(GreaterEqualTest, FloatBroardcast)
+TEST_F(GreaterEqualTest, FloatBroardcast)
{
std::vector<float> x_data{
0.5, 0.7, 0.9, // Row 1
@@ -74,12 +84,13 @@ TEST(GreaterEqualTest, FloatBroardcast)
false, false, true, // Row 3
};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
@@ -90,7 +101,7 @@ TEST(GreaterEqualTest, FloatBroardcast)
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
-TEST(GreaterEqualTest, Uint8Quantized)
+TEST_F(GreaterEqualTest, Uint8Quantized)
{
std::vector<float> x_data{
0.5, 0.6, 0.7, 0.9, // Row 1
@@ -108,21 +119,22 @@ TEST(GreaterEqualTest, Uint8Quantized)
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(GreaterEqualTest, Uint8QuantizedRescale)
+TEST_F(GreaterEqualTest, Uint8QuantizedRescale)
{
std::vector<float> x_data{
0.5, 0.5, 0.7, 0.9, // Row 1
@@ -142,21 +154,22 @@ TEST(GreaterEqualTest, Uint8QuantizedRescale)
std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(GreaterEqualTest, Uint8QuantizedBroadcast)
+TEST_F(GreaterEqualTest, Uint8QuantizedBroadcast)
{
std::vector<float> x_data{
0.4, -0.8, 0.7, 0.3, // Row 1
@@ -175,34 +188,35 @@ TEST(GreaterEqualTest, Uint8QuantizedBroadcast)
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(GreaterEqualTest, Input_Type_Mismatch_NEG)
+TEST_F(GreaterEqualTest, Input_Type_Mismatch_NEG)
{
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(GreaterEqualTest, Input_Output_Type_NEG)
+TEST_F(GreaterEqualTest, Input_Output_Type_NEG)
{
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/If.cpp b/compiler/luci-interpreter/src/kernels/If.cpp
index a267f6267..971708bca 100644
--- a/compiler/luci-interpreter/src/kernels/If.cpp
+++ b/compiler/luci-interpreter/src/kernels/If.cpp
@@ -68,6 +68,8 @@ void If::execute() const
const int32_t num_elements = input(i)->shape().num_elements();
const std::size_t element_size = getDataTypeSize(input(i)->element_type());
+ // TODO: Think about how allocate memory for output in main graph
+ active_graph->configureAllocations(graph_inputs[i]);
std::memcpy(graph_inputs[i]->data<void>(), input(i)->data<void>(), num_elements * element_size);
}
@@ -78,6 +80,8 @@ void If::execute() const
{
LUCI_INTERPRETER_CHECK(graph_outputs[i]->element_type() == output(i)->element_type());
output(i)->resize(graph_outputs[i]->shape());
+ // TODO: Think about how allocate memory for output in main graph
+ active_graph->configureAllocations(output(i));
const int32_t num_elements = output(i)->shape().num_elements();
const std::size_t element_size = getDataTypeSize(output(i)->element_type());
diff --git a/compiler/luci-interpreter/src/kernels/If.test.cpp b/compiler/luci-interpreter/src/kernels/If.test.cpp
index 0dba310d9..c5f4faf75 100644
--- a/compiler/luci-interpreter/src/kernels/If.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/If.test.cpp
@@ -21,6 +21,8 @@
#include "kernels/Mul.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
namespace luci_interpreter
{
namespace kernels
@@ -30,9 +32,17 @@ namespace
using namespace testing;
-RuntimeGraph *buildAddSubgraph(RuntimeModule *module)
+class IfTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+RuntimeGraph *buildAddSubgraph(RuntimeModule *module, IMemoryManager *memory_manager)
{
- RuntimeGraph *graph = module->addGraph();
+ RuntimeGraph *graph = module->addGraph(memory_manager);
Tensor *input1 = graph->addTensor(
std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
Tensor *input2 = graph->addTensor(
@@ -40,6 +50,10 @@ RuntimeGraph *buildAddSubgraph(RuntimeModule *module)
Tensor *output = graph->addTensor(
std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+ memory_manager->allocate_memory(*input1);
+ memory_manager->allocate_memory(*input2);
+ memory_manager->allocate_memory(*output);
+
graph->setInputTensors({input1, input2});
graph->setOutputTensors({output});
@@ -50,9 +64,9 @@ RuntimeGraph *buildAddSubgraph(RuntimeModule *module)
return graph;
}
-RuntimeGraph *buildMulSubgraph(RuntimeModule *module)
+RuntimeGraph *buildMulSubgraph(RuntimeModule *module, IMemoryManager *memory_manager)
{
- RuntimeGraph *graph = module->addGraph();
+ RuntimeGraph *graph = module->addGraph(memory_manager);
Tensor *input1 = graph->addTensor(
std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
Tensor *input2 = graph->addTensor(
@@ -60,6 +74,10 @@ RuntimeGraph *buildMulSubgraph(RuntimeModule *module)
Tensor *output = graph->addTensor(
std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+ memory_manager->allocate_memory(*input1);
+ memory_manager->allocate_memory(*input2);
+ memory_manager->allocate_memory(*output);
+
graph->setInputTensors({input1, input2});
graph->setOutputTensors({output});
@@ -70,67 +88,69 @@ RuntimeGraph *buildMulSubgraph(RuntimeModule *module)
return graph;
}
-TEST(IfTest, CondTrue)
+TEST_F(IfTest, CondTrue)
{
- Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true});
- Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7});
- Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2});
+ Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true}, _memory_manager.get());
+ Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+ Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
Tensor output = makeOutputTensor(DataType::FLOAT32);
RuntimeModule module(nullptr);
- RuntimeGraph *then_graph = buildAddSubgraph(&module);
- RuntimeGraph *else_graph = buildMulSubgraph(&module);
+ RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+ RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
kernel.configure();
+ _memory_manager->allocate_memory(output);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({6, 9}));
}
-TEST(IfTest, CondFalse)
+TEST_F(IfTest, CondFalse)
{
- Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false});
- Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7});
- Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2});
+ Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false}, _memory_manager.get());
+ Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+ Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
Tensor output = makeOutputTensor(DataType::FLOAT32);
RuntimeModule module(nullptr);
- RuntimeGraph *then_graph = buildAddSubgraph(&module);
- RuntimeGraph *else_graph = buildMulSubgraph(&module);
+ RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+ RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
kernel.configure();
+ _memory_manager->allocate_memory(output);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({5, 14}));
}
-TEST(IfTest, InvalidCondType_NEG)
+TEST_F(IfTest, InvalidCondType_NEG)
{
- Tensor cond = makeInputTensor<DataType::FLOAT32>({1}, {1});
- Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7});
- Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2});
+ Tensor cond = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+ Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+ Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
Tensor output = makeOutputTensor(DataType::FLOAT32);
RuntimeModule module(nullptr);
- RuntimeGraph *then_graph = buildAddSubgraph(&module);
- RuntimeGraph *else_graph = buildMulSubgraph(&module);
+ RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+ RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(IfTest, InvalidCondElementNum_NEG)
+TEST_F(IfTest, InvalidCondElementNum_NEG)
{
- Tensor cond = makeInputTensor<DataType::BOOL>({2}, {false, true});
- Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7});
- Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2});
+ Tensor cond = makeInputTensor<DataType::BOOL>({2}, {false, true}, _memory_manager.get());
+ Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+ Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
Tensor output = makeOutputTensor(DataType::FLOAT32);
RuntimeModule module(nullptr);
- RuntimeGraph *then_graph = buildAddSubgraph(&module);
- RuntimeGraph *else_graph = buildMulSubgraph(&module);
+ RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+ RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
EXPECT_ANY_THROW(kernel.configure());
diff --git a/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp b/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp
index 1d4ccb4cd..04400c3c0 100644
--- a/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp
@@ -15,6 +15,7 @@
*/
#include "kernels/InstanceNorm.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -24,11 +25,21 @@ namespace
{
using namespace testing;
-TEST(InstanceNormTest, Simple)
+
+class InstanceNormTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(InstanceNormTest, Simple)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 2, 1}, {1, 1, 1, 1});
- Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1});
- Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2});
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 2, 1}, {1, 1, 1, 1}, _memory_manager.get());
+ Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+ Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
InstanceNormParams params{};
@@ -37,17 +48,19 @@ TEST(InstanceNormTest, Simple)
InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
}
-TEST(InstanceNormTest, Single_gamma_beta)
+TEST_F(InstanceNormTest, Single_gamma_beta)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1});
- Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1});
- Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2});
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get());
+ Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+ Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
InstanceNormParams params{};
@@ -56,17 +69,19 @@ TEST(InstanceNormTest, Single_gamma_beta)
InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 2}));
}
-TEST(InstanceNormTest, Wrong_gamma_beta_dim_NEG)
+TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1});
- Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1});
- Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2});
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get());
+ Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1}, _memory_manager.get());
+ Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
InstanceNormParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
index 2eaf5404e..64222953f 100644
--- a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
@@ -17,7 +17,7 @@
#include "kernels/L2Normalize.h"
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALL2Normalize.h"
#include <stdexcept>
@@ -66,9 +66,9 @@ template <typename T> void L2Normalize::eval(int32_t zero_point) const
{
tflite::L2NormalizationParams op_params{};
op_params.input_zero_point = zero_point;
- tflite::optimized_ops::L2Normalization(op_params, getTensorShape(input()),
- getTensorData<T>(input()), getTensorShape(output()),
- getTensorData<T>(output()));
+ luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()),
+ getTensorData<T>(input()), getTensorShape(output()),
+ getTensorData<T>(output()));
}
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
index 6281b451b..1e565e358 100644
--- a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
@@ -16,6 +16,7 @@
*/
#include "kernels/L2Normalize.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -30,7 +31,9 @@ template <typename T>
void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
std::initializer_list<float> input_data, std::initializer_list<float> output_data)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
L2NormParams params{};
@@ -38,6 +41,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
L2Normalize kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
@@ -50,12 +54,13 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
std::initializer_list<float> input_data,
std::initializer_list<float> output_data)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::pair<float, int32_t> quant_param =
quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
std::max(input_data) > 0 ? std::max(input_data) : 0.f);
- Tensor input_tensor =
- makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 128., 128);
L2NormParams params{};
@@ -63,6 +68,7 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
L2Normalize kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -85,9 +91,11 @@ TYPED_TEST(L2NormalizeTest, Simple)
TEST(L2NormalizeTest, ActivationType_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
L2NormParams params{};
@@ -99,9 +107,11 @@ TEST(L2NormalizeTest, ActivationType_NEG)
TEST(L2NormalizeTest, InvalidOutputQuantParam_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
- Tensor input_tensor = makeInputTensor<DataType::U8>({1, 1, 1, 6}, 1. / 64., 127, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 1, 1, 6}, 1. / 64., 127, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 64., 127);
L2NormParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
index 5bf3ba5a8..5a88808d5 100644
--- a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
@@ -19,7 +19,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALL2Pool2D.h"
#include <stdexcept>
@@ -75,9 +75,9 @@ void L2Pool2D::execute() const
op_params.padding_values.width = _padding_width;
op_params.float_activation_min = activation_min;
op_params.float_activation_max = activation_max;
- tflite::optimized_ops::L2Pool(op_params, getTensorShape(input()),
- getTensorData<float>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
+ luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
break;
default:
throw std::runtime_error("Unsupported type.");
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
index 52f426a08..289742a50 100644
--- a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/L2Pool2D.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,14 +28,23 @@ namespace
using namespace testing;
-TEST(L2Pool2DTest, FloatNone)
+class L2Pool2DTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(L2Pool2DTest, FloatNone)
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
0, 6, 2, 4, //
3, 2, 10, 7, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pool2DParams params{};
@@ -47,6 +57,7 @@ TEST(L2Pool2DTest, FloatNone)
L2Pool2D kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{3.5, 6.5};
@@ -54,14 +65,15 @@ TEST(L2Pool2DTest, FloatNone)
// TODO make a Shape checking of output_tensor.
}
-TEST(L2Pool2DTest, FloatRelu)
+TEST_F(L2Pool2DTest, FloatRelu)
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
-1, -6, 2, 4, //
-3, -2, 10, 7, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pool2DParams params{};
@@ -74,6 +86,7 @@ TEST(L2Pool2DTest, FloatRelu)
L2Pool2D kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{3.53553, 6.5};
@@ -81,14 +94,15 @@ TEST(L2Pool2DTest, FloatRelu)
// TODO make a Shape checking of output_tensor.
}
-TEST(L2Pool2DTest, FloatRelu1)
+TEST_F(L2Pool2DTest, FloatRelu1)
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
-0.1, -0.6, 2, 4, //
-0.3, -0.2, 10, 7, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pool2DParams params{};
@@ -101,6 +115,7 @@ TEST(L2Pool2DTest, FloatRelu1)
L2Pool2D kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{0.353553, 1.0};
@@ -108,14 +123,15 @@ TEST(L2Pool2DTest, FloatRelu1)
// TODO make a Shape checking of output_tensor.
}
-TEST(L2Pool2DTest, FloatRelu6)
+TEST_F(L2Pool2DTest, FloatRelu6)
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
-0.1, -0.6, 2, 4, //
-0.3, -0.2, 10, 7, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pool2DParams params{};
@@ -128,6 +144,7 @@ TEST(L2Pool2DTest, FloatRelu6)
L2Pool2D kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{0.353553, 6.0};
@@ -135,14 +152,15 @@ TEST(L2Pool2DTest, FloatRelu6)
// TODO make a Shape checking of output_tensor.
}
-TEST(L2Pool2DTest, FloatPaddingSame)
+TEST_F(L2Pool2DTest, FloatPaddingSame)
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
0, 6, 2, 4, //
3, 2, 10, 7, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pool2DParams params{};
@@ -155,6 +173,7 @@ TEST(L2Pool2DTest, FloatPaddingSame)
L2Pool2D kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{3.5, 6.5};
@@ -162,14 +181,15 @@ TEST(L2Pool2DTest, FloatPaddingSame)
// TODO make a Shape checking of output_tensor.
}
-TEST(L2Pool2DTest, FloatPaddingSameStride)
+TEST_F(L2Pool2DTest, FloatPaddingSameStride)
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
0, 6, 2, 4, //
3, 2, 10, 7, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pool2DParams params{};
@@ -182,6 +202,7 @@ TEST(L2Pool2DTest, FloatPaddingSameStride)
L2Pool2D kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0};
@@ -189,14 +210,15 @@ TEST(L2Pool2DTest, FloatPaddingSameStride)
// TODO make a Shape checking of output_tensor.
}
-TEST(L2Pool2DTest, FloatPaddingValidStride)
+TEST_F(L2Pool2DTest, FloatPaddingValidStride)
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
0, 6, 2, 4, //
3, 2, 10, 7, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pool2DParams params{};
@@ -209,6 +231,7 @@ TEST(L2Pool2DTest, FloatPaddingValidStride)
L2Pool2D kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{3.5, 6.0, 6.5};
@@ -216,14 +239,15 @@ TEST(L2Pool2DTest, FloatPaddingValidStride)
// TODO make a Shape checking of output_tensor.
}
-TEST(L2Pool2DTest, InvalidInputShape_NEG)
+TEST_F(L2Pool2DTest, InvalidInputShape_NEG)
{
Shape input_shape{1, 2, 4};
std::vector<float> input_data{
0, 6, 2, 4, //
3, 2, 10, 7, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pool2DParams params{};
@@ -238,14 +262,15 @@ TEST(L2Pool2DTest, InvalidInputShape_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(L2Pool2DTest, InvalidInputOutputType_NEG)
+TEST_F(L2Pool2DTest, InvalidInputOutputType_NEG)
{
Shape input_shape{1, 2, 4};
std::vector<float> input_data{
0, 6, 2, 4, //
3, 2, 10, 7, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
Pool2DParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
index f468da5d3..3833a55e8 100644
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
@@ -18,8 +18,9 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+#include "PALLeakyRelu.h"
#include <stdexcept>
@@ -66,9 +67,8 @@ void LeakyRelu::evalFloat() const
{
tflite::LeakyReluParams op_params{};
op_params.alpha = params().alpha;
- tflite::optimized_ops::LeakyRelu(op_params, getTensorShape(input()),
- getTensorData<float>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
+ luci_interpreter_pal::LeakyRelu(op_params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
}
void LeakyRelu::evalQuantized() const
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
index b5cc3e7fc..6ec8a348a 100644
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/LeakyRelu.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -31,8 +32,10 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
std::initializer_list<float> input_data, std::initializer_list<float> output_data,
float alpha)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
constexpr DataType element_type = getElementType<T>();
- Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(element_type);
LeakyReluParams params{};
@@ -41,6 +44,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
LeakyRelu kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -53,10 +57,11 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
std::initializer_list<float> input_data,
std::initializer_list<float> output_data, float alpha)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
const float quantized_tolerance = getTolerance(-8, 127.f / 16.f, 255);
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-8, 127.f / 16.f);
- Tensor input_tensor =
- makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
LeakyReluParams params{};
@@ -65,6 +70,7 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
LeakyRelu kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -99,10 +105,13 @@ TYPED_TEST(LeakReluTest, Simple)
TEST(LeakReluTest, IvalidInputOutputType_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, {
- 0.0f, 1.0f, 3.0f, // Row 1
- 1.0f, -1.0f, -2.0f, // Row 2
- });
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3},
+ {
+ 0.0f, 1.0f, 3.0f, // Row 1
+ 1.0f, -1.0f, -2.0f, // Row 2
+ },
+ memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
LeakyReluParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/Less.h b/compiler/luci-interpreter/src/kernels/Less.h
index fe03e10b1..293740e72 100644
--- a/compiler/luci-interpreter/src/kernels/Less.h
+++ b/compiler/luci-interpreter/src/kernels/Less.h
@@ -42,9 +42,9 @@ private:
private:
int32_t _x_multiplier = 0;
- int32_t _x_shift = 0;
+ int _x_shift = 0;
int32_t _y_multiplier = 0;
- int32_t _y_shift = 0;
+ int _y_shift = 0;
};
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-interpreter/src/kernels/Less.test.cpp
index 2972bd559..e9d09b288 100644
--- a/compiler/luci-interpreter/src/kernels/Less.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Less.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Less.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,7 +28,15 @@ namespace
using namespace testing;
-TEST(LessTest, FloatSimple)
+class LessTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LessTest, FloatSimple)
{
std::vector<float> x_data{
0.5, 0.7, 0.9, // Row 1
@@ -44,19 +53,20 @@ TEST(LessTest, FloatSimple)
false, false, true, // Row 2
};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Less kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
}
-TEST(LessTest, FloatBroardcast)
+TEST_F(LessTest, FloatBroardcast)
{
std::vector<float> x_data{
0.5, 0.7, 0.9, // Row 1
@@ -74,12 +84,13 @@ TEST(LessTest, FloatBroardcast)
true, true, false, // Row 3
};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Less kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
@@ -90,7 +101,7 @@ TEST(LessTest, FloatBroardcast)
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
-TEST(LessTest, Uint8Quantized)
+TEST_F(LessTest, Uint8Quantized)
{
std::vector<float> x_data{
0.5, 0.6, 0.7, 0.9, // Row 1
@@ -108,21 +119,22 @@ TEST(LessTest, Uint8Quantized)
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Less kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(LessTest, Uint8QuantizedRescale)
+TEST_F(LessTest, Uint8QuantizedRescale)
{
std::vector<float> x_data{
0.5, 0.6, 0.7, 0.9, // Row 1
@@ -142,21 +154,22 @@ TEST(LessTest, Uint8QuantizedRescale)
std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Less kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(LessTest, Uint8QuantizedBroadcast)
+TEST_F(LessTest, Uint8QuantizedBroadcast)
{
std::vector<float> x_data{
0.4, -0.8, 0.7, 0.3, // Row 1
@@ -175,34 +188,35 @@ TEST(LessTest, Uint8QuantizedBroadcast)
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Less kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(LessTest, Input_Type_Mismatch_NEG)
+TEST_F(LessTest, Input_Type_Mismatch_NEG)
{
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Less kernel(&x_tensor, &y_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(LessTest, Input_Output_Type_NEG)
+TEST_F(LessTest, Input_Output_Type_NEG)
{
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Less kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-interpreter/src/kernels/LessEqual.h
index ed4b0f1ea..b6da1a2a8 100644
--- a/compiler/luci-interpreter/src/kernels/LessEqual.h
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.h
@@ -42,9 +42,9 @@ private:
private:
int32_t _x_multiplier = 0;
- int32_t _x_shift = 0;
+ int _x_shift = 0;
int32_t _y_multiplier = 0;
- int32_t _y_shift = 0;
+ int _y_shift = 0;
};
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
index db65815a6..0558003dd 100644
--- a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/LessEqual.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,7 +28,15 @@ namespace
using namespace testing;
-TEST(LessEqualTest, FloatSimple)
+class LessEqualTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LessEqualTest, FloatSimple)
{
std::vector<float> x_data{
0.5, 0.7, 0.9, // Row 1
@@ -44,19 +53,20 @@ TEST(LessEqualTest, FloatSimple)
false, true, true, // Row 2
};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
}
-TEST(LessEqualTest, FloatBroardcast)
+TEST_F(LessEqualTest, FloatBroardcast)
{
std::vector<float> x_data{
0.5, 0.7, 0.9, // Row 1
@@ -74,12 +84,13 @@ TEST(LessEqualTest, FloatBroardcast)
true, true, false, // Row 3
};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
@@ -90,7 +101,7 @@ TEST(LessEqualTest, FloatBroardcast)
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
-TEST(LessEqualTest, Uint8Quantized)
+TEST_F(LessEqualTest, Uint8Quantized)
{
std::vector<float> x_data{
0.5, 0.6, 0.7, 0.9, // Row 1
@@ -108,21 +119,22 @@ TEST(LessEqualTest, Uint8Quantized)
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(LessEqualTest, Uint8QuantizedRescale)
+TEST_F(LessEqualTest, Uint8QuantizedRescale)
{
std::vector<float> x_data{
0.5, 0.6, 0.7, 0.9, // Row 1
@@ -142,21 +154,22 @@ TEST(LessEqualTest, Uint8QuantizedRescale)
std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(LessEqualTest, Uint8QuantizedBroadcast)
+TEST_F(LessEqualTest, Uint8QuantizedBroadcast)
{
std::vector<float> x_data{
0.4, -0.8, 0.7, 0.3, // Row 1
@@ -175,34 +188,35 @@ TEST(LessEqualTest, Uint8QuantizedBroadcast)
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(LessEqualTest, Input_Type_Mismatch_NEG)
+TEST_F(LessEqualTest, Input_Type_Mismatch_NEG)
{
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(LessEqualTest, Input_Output_Type_NEG)
+TEST_F(LessEqualTest, Input_Output_Type_NEG)
{
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
index fd2ec41a1..a2bf442b0 100644
--- a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
+++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
@@ -18,7 +18,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALLocalResponseNormalization.h"
#include <stdexcept>
@@ -52,7 +52,7 @@ void LocalResponseNormalization::execute() const
op_params.bias = params().bias;
op_params.alpha = params().alpha;
op_params.beta = params().beta;
- tflite::optimized_ops::LocalResponseNormalization(
+ luci_interpreter_pal::LocalResponseNormalization(
op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()),
getTensorData<float>(output()));
break;
diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
index 6a4331d34..4a9d4739f 100644
--- a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/LocalResponseNormalization.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,10 +28,18 @@ namespace
using namespace testing;
-TEST(LocalResponseNormalizationTest, SameAsL2Norm)
+class LocalResponseNormalizationTest : public ::testing::Test
{
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LocalResponseNormalizationTest, SameAsL2Norm)
+{
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
LocalResponseNormalizationParams params{};
@@ -41,16 +50,17 @@ TEST(LocalResponseNormalizationTest, SameAsL2Norm)
LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor),
FloatArrayNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}));
}
-TEST(LocalResponseNormalizationTest, WithAlpha)
+TEST_F(LocalResponseNormalizationTest, WithAlpha)
{
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
LocalResponseNormalizationParams params{};
@@ -61,16 +71,17 @@ TEST(LocalResponseNormalizationTest, WithAlpha)
LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor),
FloatArrayNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025}));
}
-TEST(LocalResponseNormalizationTest, WithBias)
+TEST_F(LocalResponseNormalizationTest, WithBias)
{
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
LocalResponseNormalizationParams params{};
@@ -81,16 +92,17 @@ TEST(LocalResponseNormalizationTest, WithBias)
LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor),
FloatArrayNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02}));
}
-TEST(LocalResponseNormalizationTest, SmallRadius)
+TEST_F(LocalResponseNormalizationTest, SmallRadius)
{
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
LocalResponseNormalizationParams params{};
@@ -101,16 +113,17 @@ TEST(LocalResponseNormalizationTest, SmallRadius)
LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor),
FloatArrayNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266}));
}
-TEST(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
+TEST_F(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
{
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>({1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
LocalResponseNormalizationParams params{};
@@ -123,10 +136,10 @@ TEST(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(LocalResponseNormalizationTest, InvalidInputOutputType_NEG)
+TEST_F(LocalResponseNormalizationTest, InvalidInputOutputType_NEG)
{
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
LocalResponseNormalizationParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp b/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp
index 03d13e4ce..79c315338 100644
--- a/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp
@@ -18,9 +18,9 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/log_softmax.h>
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALLogSoftmax.h"
namespace luci_interpreter
{
@@ -41,8 +41,7 @@ void LogSoftmax::configure()
params.table = _table;
params.beta = 1.0;
-
- tflite::optimized_ops::PopulateSoftmaxLookupTable(&params, input()->scale(), params.beta);
+ luci_interpreter_pal::PopulateSoftmaxLookupTable(&params, input()->scale(), params.beta);
}
output()->resize(input()->shape());
}
@@ -76,6 +75,7 @@ void LogSoftmax::evalQuantized() const
const auto input_scale = input()->scale();
uint8_t *output_data = getTensorData<uint8_t>(output());
const uint8_t *input_data = getTensorData<uint8_t>(input());
+ const float beta = 1.0;
tflite::SoftmaxParams params{};
@@ -83,8 +83,9 @@ void LogSoftmax::evalQuantized() const
params.zero_point = output()->zero_point();
params.scale = output()->scale();
- tflite::optimized_ops::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
- output_data);
+ luci_interpreter_pal::InitializeParams(&params, input_scale, beta);
+ luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+ output_data);
}
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp b/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp
index 8a90c1dd0..50dcd5c28 100644
--- a/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/LogSoftmax.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,18 +28,28 @@ namespace
using namespace testing;
-TEST(LogSoftmaxTest, Float)
+class LogSoftmaxTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogSoftmaxTest, Float)
{
Shape input_shape{2, 4};
std::vector<float> input_data{
0, -6, 2, 4, //
3, -2, 10, 1, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
LogSoftmax kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{
@@ -48,7 +59,7 @@ TEST(LogSoftmaxTest, Float)
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
}
-TEST(LogSoftmaxTest, Uint8)
+TEST_F(LogSoftmaxTest, Uint8)
{
float kMin = -10;
float kMax = 10;
@@ -58,12 +69,13 @@ TEST(LogSoftmaxTest, Uint8)
0, -6, 2, 4, //
3, -2, 10, 1, //
};
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second,
+ input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
LogSoftmax kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{
@@ -78,28 +90,29 @@ TEST(LogSoftmaxTest, Uint8)
::testing::ElementsAreArray({189, 93, 221, 253, 142, 63, 255, 111}));
}
-TEST(LogSoftmaxTest, InvalidInputOutputType_NEG)
+TEST_F(LogSoftmaxTest, InvalidInputOutputType_NEG)
{
std::vector<float> input_data{
0, -6, 2, 4, //
3, -2, 10, 1, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 4}, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 4}, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
LogSoftmax kernel(&input_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(LogSoftmaxTest, InvalidOutputQuantParam_NEG)
+TEST_F(LogSoftmaxTest, InvalidOutputQuantParam_NEG)
{
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-10, 10);
std::vector<float> input_data{
0, -6, 2, 4, //
3, -2, 10, 1, //
};
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second,
+ input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, 20. / 256, 255);
LogSoftmax kernel(&input_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp
index 564f191d5..21b7951e0 100644
--- a/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/LogicalAnd.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,15 +28,26 @@ namespace
using namespace testing;
-TEST(LogicalAndTest, Basic)
+class LogicalAndTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalAndTest, Basic)
{
Shape input_shape{1, 1, 1, 4};
- Tensor input_tensor1 = makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true});
- Tensor input_tensor2 = makeInputTensor<DataType::BOOL>(input_shape, {true, false, true, false});
+ Tensor input_tensor1 =
+ makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::BOOL>(input_shape, {true, false, true, false}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor),
@@ -43,14 +55,17 @@ TEST(LogicalAndTest, Basic)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
}
-TEST(LogicalAndTest, Broadcast)
+TEST_F(LogicalAndTest, Broadcast)
{
- Tensor input_tensor1 = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true});
- Tensor input_tensor2 = makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {true});
+ Tensor input_tensor1 = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+ _memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {true}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor),
@@ -58,20 +73,23 @@ TEST(LogicalAndTest, Broadcast)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
}
-TEST(LogicalAndTest, MismatchInputType_NEG)
+TEST_F(LogicalAndTest, MismatchInputType_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1});
- Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false});
+ Tensor input1_tensor =
+ makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S32);
LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(LogicalAndTest, InputTypeInvalid_NEG)
+TEST_F(LogicalAndTest, InputTypeInvalid_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1});
- Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0});
+ Tensor input1_tensor =
+ makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp
index dccb81102..3cbf27f6b 100644
--- a/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/LogicalNot.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,14 +28,24 @@ namespace
using namespace testing;
-TEST(LogicalNotTest, Basic)
+class LogicalNotTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalNotTest, Basic)
{
Shape input_shape{1, 1, 1, 4};
- Tensor input_tensor = makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true});
+ Tensor input_tensor =
+ makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LogicalNot kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor),
@@ -42,18 +53,20 @@ TEST(LogicalNotTest, Basic)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
}
-TEST(LogicalNotTest, OutputTypeInvalid_NEG)
+TEST_F(LogicalNotTest, OutputTypeInvalid_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true});
+ Tensor input_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+ _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S32);
LogicalNot kernel(&input_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(LogicalNotTest, InputTypeInvalid_NEG)
+TEST_F(LogicalNotTest, InputTypeInvalid_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1});
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LogicalNot kernel(&input_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/LogicalOr.cpp b/compiler/luci-interpreter/src/kernels/LogicalOr.cpp
index 7027a2a8b..f289ca64f 100644
--- a/compiler/luci-interpreter/src/kernels/LogicalOr.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogicalOr.cpp
@@ -20,8 +20,6 @@
#include "kernels/Utils.h"
#include "kernels/BinaryOpCommon.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-
namespace luci_interpreter
{
namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp
index 677eac96a..d65a69a5e 100644
--- a/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/LogicalOr.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,15 +28,26 @@ namespace
using namespace testing;
-TEST(LogicalOrTest, Basic)
+class LogicalOrTest : public ::testing::Test
{
- Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true});
- Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, true, false});
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalOrTest, Basic)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+ _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, true, false},
+ _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor),
@@ -43,15 +55,18 @@ TEST(LogicalOrTest, Basic)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
}
-TEST(LogicalOrTest, Broadcast)
+TEST_F(LogicalOrTest, Broadcast)
{
- Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true});
- Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false});
+ Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+ _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor),
@@ -59,10 +74,12 @@ TEST(LogicalOrTest, Broadcast)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
}
-TEST(LogicalOrTest, MismatchInputType_NEG)
+TEST_F(LogicalOrTest, MismatchInputType_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1});
- Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false});
+ Tensor input1_tensor =
+ makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S32);
@@ -70,10 +87,11 @@ TEST(LogicalOrTest, MismatchInputType_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(LogicalOrTest, InputTypeInvalid_NEG)
+TEST_F(LogicalOrTest, InputTypeInvalid_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1});
- Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0});
+ Tensor input1_tensor =
+ makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.cpp b/compiler/luci-interpreter/src/kernels/Logistic.cpp
index 97d7bf13d..58e4f185d 100644
--- a/compiler/luci-interpreter/src/kernels/Logistic.cpp
+++ b/compiler/luci-interpreter/src/kernels/Logistic.cpp
@@ -18,7 +18,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/logistic.h>
namespace luci_interpreter
{
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
index 41369a417..70227563f 100644
--- a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Logistic.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -30,11 +31,15 @@ template <typename T>
void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
std::initializer_list<float> input_data, std::initializer_list<float> output_data)
{
- Tensor input_tensor = makeInputTensor<getElementType<T>()>(input_shape, input_data);
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<getElementType<T>()>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(getElementType<T>());
Logistic kernel(&input_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
@@ -47,14 +52,18 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
std::initializer_list<float> input_data,
std::initializer_list<float> output_data)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
std::pair<float, int32_t> input_quant_param =
quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
- Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
- input_quant_param.second, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
Logistic kernel(&input_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -107,9 +116,12 @@ TYPED_TEST(LogisticTest, Simple)
TEST(LogisticTest, IvalidInputOutputType_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
Shape input_shape = {1};
std::vector<float> input_data{10};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
Logistic kernel(&input_tensor, &output_tensor);
@@ -118,11 +130,13 @@ TEST(LogisticTest, IvalidInputOutputType_NEG)
TEST(LogisticTest, IvalidQuantParam_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
Shape input_shape = {2};
std::vector<float> input_data{-10, 10};
std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-10, 10);
- Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
- input_quant_param.second, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0);
Logistic kernel(&input_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
index b9991f7ec..44f2a222f 100644
--- a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/MaxPool2D.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -26,7 +27,15 @@ namespace
using namespace testing;
-TEST(MaxPool2DTest, Float)
+class MaxPool2DTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaxPool2DTest, Float)
{
Shape input_shape{1, 3, 5, 1};
std::vector<float> input_data{
@@ -34,7 +43,8 @@ TEST(MaxPool2DTest, Float)
-7, -6, -5, -4, -3, //
5, 4, 3, 6, 7, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pool2DParams params{};
@@ -47,6 +57,7 @@ TEST(MaxPool2DTest, Float)
MaxPool2D kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{
@@ -58,15 +69,15 @@ TEST(MaxPool2DTest, Float)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(MaxPool2DTest, Uint8)
+TEST_F(MaxPool2DTest, Uint8)
{
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375);
std::vector<float> input_data{
0, -6, 12, 4, //
-3, -2, 10, 7, //
};
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
Pool2DParams params{};
@@ -79,6 +90,7 @@ TEST(MaxPool2DTest, Uint8)
MaxPool2D kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{0.0, 6.0};
@@ -87,7 +99,7 @@ TEST(MaxPool2DTest, Uint8)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(MaxPool2DTest, SInt16)
+TEST_F(MaxPool2DTest, SInt16)
{
Shape input_shape{1, 3, 5, 1};
std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
@@ -101,7 +113,8 @@ TEST(MaxPool2DTest, SInt16)
5, 6, //
};
- Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
Pool2DParams params{};
@@ -114,6 +127,7 @@ TEST(MaxPool2DTest, SInt16)
MaxPool2D kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
diff --git a/compiler/luci-interpreter/src/kernels/Maximum.test.cpp b/compiler/luci-interpreter/src/kernels/Maximum.test.cpp
index 2ddaeaf04..e4a505b03 100644
--- a/compiler/luci-interpreter/src/kernels/Maximum.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Maximum.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Maximum.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,34 +28,48 @@ namespace
using namespace testing;
-TEST(MaximumTest, Float)
+class MaximumTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaximumTest, Float)
{
Shape input_shape{3, 1, 2};
std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
- Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1);
- Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data2);
+ Tensor input_tensor1 =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{1.0, 0.0, 1.0, 12.0, -2.0, -1.43};
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
}
-TEST(MaximumTest, Uint8)
+TEST_F(MaximumTest, Uint8)
{
Shape input_shape{3, 1, 2};
std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
- Tensor input_tensor1 = makeInputTensor<DataType::U8>(input_shape, input_data1);
- Tensor input_tensor2 = makeInputTensor<DataType::U8>(input_shape, input_data2);
+ Tensor input_tensor1 =
+ makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<int32_t> ref_output_shape{2, 4};
diff --git a/compiler/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-interpreter/src/kernels/Mean.cpp
index 421632812..8e65e0d6d 100644
--- a/compiler/luci-interpreter/src/kernels/Mean.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mean.cpp
@@ -19,7 +19,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
#include <stdexcept>
@@ -28,7 +28,7 @@ namespace luci_interpreter
namespace kernels
{
-static void resolveAxes(const int *axes_data, int num_axes, tflite::MeanParams *params)
+static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
{
params->axis_count = num_axes;
for (int i = 0; i < num_axes; ++i)
@@ -42,7 +42,7 @@ static void resolveAxes(const int *axes_data, int num_axes, tflite::MeanParams *
}
// Returns the number of axes that will be reduced. Removes duplicates.
-static int getAxisReductionCount(const int *axes_data, int num_axes, int input_num_dims)
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
{
int reduction_count = num_axes;
for (int i = 0; i < num_axes; ++i)
@@ -63,7 +63,7 @@ static int getAxisReductionCount(const int *axes_data, int num_axes, int input_n
return reduction_count;
}
-static Shape getOutputShape(const Shape &input_shape, const int *axes_data, int num_axes,
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
bool keep_dims)
{
int input_num_dims = input_shape.num_dims();
@@ -123,8 +123,10 @@ static Shape getOutputShape(const Shape &input_shape, const int *axes_data, int
}
}
-Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams &params)
- : KernelWithParams<ReducerParams>({input, axes}, {output}, params)
+Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+ Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params)
+ : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum},
+ params)
{
}
@@ -149,17 +151,28 @@ void Mean::configure()
tflite::MeanParams params{};
resolveAxes(axes_data, num_axes, &params);
- const bool need_temporaries = !(
+ _need_temporaries = !(
_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
- if (need_temporaries)
+ if (_need_temporaries)
{
- _temp_index =
- std::make_unique<Tensor>(DataType::S32, Shape(input_num_dims), AffineQuantization{}, "");
- _resolved_axes =
- std::make_unique<Tensor>(DataType::S32, Shape(num_axes), AffineQuantization{}, "");
- _temp_sum = std::make_unique<Tensor>(input()->element_type(), output()->shape(),
- AffineQuantization{}, "");
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
+ temp_index->resize(Shape(input_num_dims));
+ resolved_axes->resize(Shape(num_axes));
+ temp_sum->resize(output()->shape());
+ }
+ else
+ {
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
+ temp_index->set_allocatable(false);
+ resolved_axes->set_allocatable(false);
+ temp_sum->set_allocatable(false);
}
}
@@ -179,12 +192,6 @@ void Mean::execute() const
default:
throw std::runtime_error("Unsupported type.");
}
- if (!!_temp_index)
- _temp_index->deallocate();
- if (!!_resolved_axes)
- _resolved_axes->deallocate();
- if (!!_temp_sum)
- _temp_sum->deallocate();
}
void Mean::evalFloat() const
@@ -197,6 +204,10 @@ void Mean::evalFloat() const
tflite::MeanParams params{};
resolveAxes(axes_data, num_axes, &params);
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
((params.axis[0] == 1 && params.axis[1] == 2) ||
@@ -207,12 +218,12 @@ void Mean::evalFloat() const
}
else
{
- tflite::reference_ops::Mean(
- getTensorData<float>(input()), getTensorShape(input()).DimsData(),
- input()->shape().num_dims(), getTensorData<float>(output()),
- getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
- _params.keep_dims, getTensorData<int>(_temp_index.get()),
- getTensorData<int>(_resolved_axes.get()), getTensorData<float>(_temp_sum.get()));
+ tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
+ input()->shape().num_dims(), getTensorData<float>(output()),
+ getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+ axes_data, num_axes, _params.keep_dims,
+ getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+ getTensorData<float>(temp_sum));
}
}
@@ -226,6 +237,10 @@ void Mean::evalQuantized() const
tflite::MeanParams params{};
resolveAxes(axes_data, num_axes, &params);
+ auto temp_index = getOutputTensors()[1];
+ auto resolved_axes = getOutputTensors()[2];
+ auto temp_sum = getOutputTensors()[3];
+
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
((params.axis[0] == 1 && params.axis[1] == 2) ||
@@ -238,12 +253,12 @@ void Mean::evalQuantized() const
}
else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
{
- tflite::reference_ops::Mean(
- getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
- input()->shape().num_dims(), getTensorData<uint8_t>(output()),
- getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
- _params.keep_dims, getTensorData<int>(_temp_index.get()),
- getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()));
+ tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
+ input()->shape().num_dims(), getTensorData<uint8_t>(output()),
+ getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+ axes_data, num_axes, _params.keep_dims,
+ getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+ getTensorData<int>(temp_sum));
}
else
{
@@ -252,8 +267,8 @@ void Mean::evalQuantized() const
getTensorShape(input()).DimsData(), input()->shape().num_dims(),
getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
- _params.keep_dims, getTensorData<int>(_temp_index.get()),
- getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()),
+ _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+ getTensorData<int>(temp_sum),
/*compute_sum=*/false);
}
}
diff --git a/compiler/luci-interpreter/src/kernels/Mean.h b/compiler/luci-interpreter/src/kernels/Mean.h
index 1cc046894..ed07ae561 100644
--- a/compiler/luci-interpreter/src/kernels/Mean.h
+++ b/compiler/luci-interpreter/src/kernels/Mean.h
@@ -30,7 +30,8 @@ namespace kernels
class Mean : public KernelWithParams<ReducerParams>
{
public:
- Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams &params);
+ Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+ Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params);
const Tensor *input() const { return _inputs[0]; }
const Tensor *axes() const { return _inputs[1]; }
@@ -45,9 +46,7 @@ private:
void evalQuantizedS16() const;
private:
- std::unique_ptr<Tensor> _temp_index;
- std::unique_ptr<Tensor> _resolved_axes;
- std::unique_ptr<Tensor> _temp_sum;
+ bool _need_temporaries = false;
};
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Mean.test.cpp b/compiler/luci-interpreter/src/kernels/Mean.test.cpp
index fa0ba2169..d2c00935a 100644
--- a/compiler/luci-interpreter/src/kernels/Mean.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mean.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Mean.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,22 +28,39 @@ namespace
using namespace testing;
-TEST(MeanTest, FloatKeepDims)
+class MeanTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MeanTest, FloatKeepDims)
{
std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
std::vector<int32_t> axis_data{0, 2};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data);
- Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ReducerParams params{};
params.keep_dims = true;
- Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+ Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+ params);
kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(temp_sum);
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{10.5, 12.5, 14.5};
@@ -51,22 +69,31 @@ TEST(MeanTest, FloatKeepDims)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(MeanTest, FloatKeepDims4DMean)
+TEST_F(MeanTest, FloatKeepDims4DMean)
{
std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
std::vector<int32_t> axis_data{1, 2};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data);
- Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ReducerParams params{};
params.keep_dims = true;
- Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+ Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+ params);
kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(temp_sum);
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{6, 7, 18, 19};
@@ -75,22 +102,31 @@ TEST(MeanTest, FloatKeepDims4DMean)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(MeanTest, FloatNotKeepDims)
+TEST_F(MeanTest, FloatNotKeepDims)
{
std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
std::vector<int32_t> axis_data{1, 0, -3, -3};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data);
- Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ReducerParams params{};
params.keep_dims = false;
- Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+ Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+ params);
kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(temp_sum);
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{12, 13};
@@ -99,23 +135,31 @@ TEST(MeanTest, FloatNotKeepDims)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(MeanTest, Uint8KeepDims)
+TEST_F(MeanTest, Uint8KeepDims)
{
float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
std::vector<int32_t> axis_data{1};
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second, input_data);
- Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor temp_sum(DataType::U8, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
ReducerParams params{};
params.keep_dims = true;
- Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+ Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+ params);
kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(temp_sum);
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{0.3, 0.35, 0.55};
@@ -125,23 +169,31 @@ TEST(MeanTest, Uint8KeepDims)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(MeanTest, Uint8NotKeepDims)
+TEST_F(MeanTest, Uint8NotKeepDims)
{
float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
std::vector<int32_t> axis_data{1};
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 3, 2}, quant_param.first, quant_param.second, input_data);
- Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 3, 2}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
ReducerParams params{};
params.keep_dims = false;
- Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+ Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+ params);
kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(temp_sum);
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{0.4, 0.4};
@@ -151,7 +203,7 @@ TEST(MeanTest, Uint8NotKeepDims)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(MeanTest, SInt16KeepDims4D)
+TEST_F(MeanTest, SInt16KeepDims4D)
{
std::vector<float> input_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
@@ -159,15 +211,24 @@ TEST(MeanTest, SInt16KeepDims4D)
std::vector<int32_t> axes_data{1, 2};
std::vector<float> ref_output_data{6, 7, 18, 19};
- Tensor input_tensor = makeInputTensor<DataType::S16>({2, 2, 3, 2}, 0.25, 0, input_data);
- Tensor axes_tensor = makeInputTensor<DataType::S32>({2}, axes_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({2, 2, 3, 2}, 0.25, 0, input_data, _memory_manager.get());
+ Tensor axes_tensor = makeInputTensor<DataType::S32>({2}, axes_data, _memory_manager.get());
+ Tensor temp_index(DataType::S32, Shape({}), {}, "");
+ Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+ Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
ReducerParams params{};
params.keep_dims = true;
- Mean kernel(&input_tensor, &axes_tensor, &output_tensor, params);
+ Mean kernel(&input_tensor, &axes_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+ params);
kernel.configure();
+ _memory_manager->allocate_memory(temp_index);
+ _memory_manager->allocate_memory(resolved_axes);
+ _memory_manager->allocate_memory(temp_sum);
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 1, 2}));
diff --git a/compiler/luci-interpreter/src/kernels/Minimum.test.cpp b/compiler/luci-interpreter/src/kernels/Minimum.test.cpp
index b6420dd9b..9a143643f 100644
--- a/compiler/luci-interpreter/src/kernels/Minimum.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Minimum.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Minimum.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,34 +28,48 @@ namespace
using namespace testing;
-TEST(MinimumTest, Float)
+class MinimumTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MinimumTest, Float)
{
Shape input_shape{3, 1, 2};
std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
- Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1);
- Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data2);
+ Tensor input_tensor1 =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{-1.0, 0.0, -1.0, 11.0, -3.0, -1.44};
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
}
-TEST(MinimumTest, Uint8)
+TEST_F(MinimumTest, Uint8)
{
Shape input_shape{3, 1, 2};
std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
- Tensor input_tensor1 = makeInputTensor<DataType::U8>(input_shape, input_data1);
- Tensor input_tensor2 = makeInputTensor<DataType::U8>(input_shape, input_data2);
+ Tensor input_tensor1 =
+ makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<int32_t> ref_output_shape{2, 4};
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
index 1139167e0..89049c96c 100644
--- a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
@@ -18,7 +18,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
namespace luci_interpreter
{
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp
new file mode 100644
index 000000000..de9da5051
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO: Add tests for MirrorPad
diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp
index 4e6e3f75a..bc855de0f 100644
--- a/compiler/luci-interpreter/src/kernels/Mul.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.cpp
@@ -20,7 +20,9 @@
#include "kernels/BinaryOpCommon.h"
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
#include <stdexcept>
@@ -77,15 +79,15 @@ void Mul::evalFloat() const
if (need_broadcast)
{
- tflite::optimized_ops::BroadcastMul4DSlow(
+ luci_interpreter_pal::BroadcastMul4DSlow(
params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
}
else
{
- tflite::optimized_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
- getTensorShape(input2()), getTensorData<float>(input2()),
- getTensorShape(output()), getTensorData<float>(output()));
+ luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
+ getTensorShape(input2()), getTensorData<float>(input2()),
+ getTensorShape(output()), getTensorData<float>(output()));
}
}
diff --git a/compiler/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
index fc7ffb5a1..471f6ac86 100644
--- a/compiler/luci-interpreter/src/kernels/Mul.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Mul.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,7 +28,15 @@ namespace
using namespace testing;
-TEST(MulTest, Float)
+class MulTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MulTest, Float)
{
Shape base_shape = {2, 3, 1, 2};
std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
@@ -45,8 +54,10 @@ TEST(MulTest, Float)
std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
for (size_t i = 0; i < test_shapes.size(); ++i)
{
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
MulParams params{};
@@ -54,6 +65,7 @@ TEST(MulTest, Float)
Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
@@ -62,8 +74,10 @@ TEST(MulTest, Float)
// Re-run with exchanged inputs.
for (size_t i = 0; i < test_shapes.size(); ++i)
{
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
MulParams params{};
@@ -71,6 +85,7 @@ TEST(MulTest, Float)
Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
@@ -78,7 +93,7 @@ TEST(MulTest, Float)
}
}
-TEST(MulTest, SInt16)
+TEST_F(MulTest, SInt16)
{
Shape base_shape = {2, 3, 1, 2};
std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
@@ -99,9 +114,10 @@ TEST(MulTest, SInt16)
{0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
for (size_t i = 0; i < test_shapes.size(); ++i)
{
- Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data);
- Tensor input2_tensor =
- makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data);
+ Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data,
+ _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0,
+ input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
const float tolerance = output_tensor.scale() * 2;
@@ -110,6 +126,7 @@ TEST(MulTest, SInt16)
Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor),
@@ -121,9 +138,10 @@ TEST(MulTest, SInt16)
// Re-run with exchanged inputs and different scales.
for (size_t i = 0; i < test_shapes.size(); ++i)
{
- Tensor input1_tensor =
- makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data);
- Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data);
+ Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0,
+ input2_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data,
+ _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 3.0 / 32767, 0);
const float tolerance = output_tensor.scale() * 2;
@@ -132,6 +150,7 @@ TEST(MulTest, SInt16)
Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor),
diff --git a/compiler/luci-interpreter/src/kernels/Neg.cpp b/compiler/luci-interpreter/src/kernels/Neg.cpp
index 99f4d4a21..c6fe08a9e 100644
--- a/compiler/luci-interpreter/src/kernels/Neg.cpp
+++ b/compiler/luci-interpreter/src/kernels/Neg.cpp
@@ -17,7 +17,7 @@
#include "kernels/Neg.h"
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALNeg.h"
#include <stdexcept>
@@ -50,8 +50,8 @@ void Neg::execute() const
void Neg::evalFloat() const
{
- tflite::reference_ops::Negate(getTensorShape(input()), getTensorData<float>(input()),
- getTensorShape(output()), getTensorData<float>(output()));
+ luci_interpreter_pal::Negate(getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
}
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Neg.test.cpp b/compiler/luci-interpreter/src/kernels/Neg.test.cpp
index 33256e1c6..8b2bc1a82 100644
--- a/compiler/luci-interpreter/src/kernels/Neg.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Neg.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Neg.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -31,13 +32,16 @@ template <typename T>
void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
std::initializer_list<T> input_data, std::initializer_list<T> output_data)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
constexpr DataType element_type = getElementType<T>();
- Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(element_type);
Neg kernel(&input_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-interpreter/src/kernels/NotEqual.h
index d729c6c14..247874df7 100644
--- a/compiler/luci-interpreter/src/kernels/NotEqual.h
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.h
@@ -42,9 +42,9 @@ private:
private:
int32_t _x_multiplier = 0;
- int32_t _x_shift = 0;
+ int _x_shift = 0;
int32_t _y_multiplier = 0;
- int32_t _y_shift = 0;
+ int _y_shift = 0;
};
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
index f9dc7781b..763f86893 100644
--- a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/NotEqual.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,7 +28,15 @@ namespace
using namespace testing;
-TEST(NotEqualTest, FloatSimple)
+class NotEqualTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(NotEqualTest, FloatSimple)
{
std::vector<float> x_data{
0.5, 0.7, 0.9, // Row 1
@@ -44,19 +53,20 @@ TEST(NotEqualTest, FloatSimple)
true, false, true, // Row 2
};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
}
-TEST(NotEqualTest, FloatBroardcast)
+TEST_F(NotEqualTest, FloatBroardcast)
{
std::vector<float> x_data{
0.5, 0.7, 0.9, // Row 1
@@ -76,12 +86,13 @@ TEST(NotEqualTest, FloatBroardcast)
false, false, false, // Row 4
};
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data);
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
@@ -92,7 +103,7 @@ TEST(NotEqualTest, FloatBroardcast)
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
-TEST(NotEqualTest, Uint8Quantized)
+TEST_F(NotEqualTest, Uint8Quantized)
{
std::vector<float> x_data{
0.5, 0.5, 0.7, 0.9, // Row 1
@@ -110,24 +121,25 @@ TEST(NotEqualTest, Uint8Quantized)
};
std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(NotEqualTest, Uint8QuantizedBroadcast)
+TEST_F(NotEqualTest, Uint8QuantizedBroadcast)
{
std::vector<float> x_data{
0.4, -0.8, 0.7, 0.3, // Row 1
@@ -148,34 +160,35 @@ TEST(NotEqualTest, Uint8QuantizedBroadcast)
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
- Tensor x_tensor =
- makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data);
- Tensor y_tensor =
- makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+ Tensor x_tensor = makeInputTensor<DataType::U8>(
+ {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1}));
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
}
-TEST(NotEqualTest, Input_Type_Mismatch_NEG)
+TEST_F(NotEqualTest, Input_Type_Mismatch_NEG)
{
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(NotEqualTest, Input_Output_Type_NEG)
+TEST_F(NotEqualTest, Input_Output_Type_NEG)
{
- Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/PRelu.cpp b/compiler/luci-interpreter/src/kernels/PRelu.cpp
index a53ac6f80..5a6b05c3a 100644
--- a/compiler/luci-interpreter/src/kernels/PRelu.cpp
+++ b/compiler/luci-interpreter/src/kernels/PRelu.cpp
@@ -19,7 +19,8 @@
#include "kernels/BinaryOpCommon.h"
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <tensorflow/lite/kernels/internal/reference/prelu.h>
#include <stdexcept>
@@ -168,10 +169,11 @@ static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val,
constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max();
const int32_t output_val =
- input_val >= 0 ? tflite::MultiplyByQuantizedMultiplier(input_val, identity_mult.multiplier,
- identity_mult.shift)
- : tflite::MultiplyByQuantizedMultiplier(input_val * alpha_val,
- alpha_mult.multiplier, alpha_mult.shift);
+ input_val >= 0
+ ? tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val),
+ identity_mult.multiplier, identity_mult.shift)
+ : tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val * alpha_val),
+ alpha_mult.multiplier, alpha_mult.shift);
const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
return clamped_output;
}
diff --git a/compiler/luci-interpreter/src/kernels/PRelu.h b/compiler/luci-interpreter/src/kernels/PRelu.h
index e85c3f7e9..f7735d418 100644
--- a/compiler/luci-interpreter/src/kernels/PRelu.h
+++ b/compiler/luci-interpreter/src/kernels/PRelu.h
@@ -50,7 +50,7 @@ private:
std::vector<ChannelQuantMultipliers> _alpha_multipliers;
// TODO merge this into one ChannelQuantMultiplier object
int32_t _output_multiplier_identity = 0;
- int32_t _output_shift_identity = 0;
+ int _output_shift_identity = 0;
};
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/PRelu.test.cpp b/compiler/luci-interpreter/src/kernels/PRelu.test.cpp
index 3dbc51cc1..6d97382de 100644
--- a/compiler/luci-interpreter/src/kernels/PRelu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/PRelu.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/PRelu.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -32,14 +33,18 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
std::initializer_list<T> alpha_data, std::initializer_list<T> output_data)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
constexpr DataType element_type = getElementType<T>();
- Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
- Tensor alpha_tensor = makeInputTensor<element_type>(alpha_shape, alpha_data);
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor alpha_tensor =
+ makeInputTensor<element_type>(alpha_shape, alpha_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(element_type);
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -97,6 +102,7 @@ float GetTolerance(float min, float max) { return (max - min) / 255.0; }
TEST(PReluTest, Uint8Simple)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f};
std::vector<float> alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f};
std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f};
@@ -104,14 +110,15 @@ TEST(PReluTest, Uint8Simple)
float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, input_data);
- Tensor alpha_tensor =
- makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -147,14 +154,16 @@ TEST(PReluTest, Uint8Broadcast)
const float kMax = 127.f / 128.f;
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 2, 2, 3}, quant_param.first, quant_param.second, input_data);
- Tensor alpha_tensor =
- makeInputTensor<DataType::U8>({1, 1, 3}, quant_param.first, quant_param.second, alpha_data);
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 2, 3}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 3}, quant_param.first, quant_param.second, alpha_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -166,12 +175,15 @@ TEST(PReluTest, Uint8Broadcast)
TEST(PReluTest, SInt16_LWQ_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
// Rewrite this test in case layer-wise quantization for sint16 is supported
std::vector<float> input_data(6); // data is not important
std::vector<float> alpha_data(6);
- Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data);
- Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data, memory_manager.get());
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
@@ -180,18 +192,22 @@ TEST(PReluTest, SInt16_LWQ_NEG)
TEST(PReluTest, SInt16_CWQ_Simple)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
std::vector<float> alpha_data{0.5f, 0.25f};
std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
std::vector<float> alpha_scales{0.05f, 0.025f};
std::vector<int32_t> zerop{0, 0};
- Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
- Tensor alpha_tensor = makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
@@ -200,14 +216,16 @@ TEST(PReluTest, SInt16_CWQ_Simple)
TEST(PReluTest, SInt16_CWQ_spatial_alpha_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input_data(6); // data is not important
std::vector<float> alpha_data(6);
std::vector<float> alpha_scales{0.25f, 0.05f};
std::vector<int32_t> zerop{0, 0};
- Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
- Tensor alpha_tensor =
- makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3, alpha_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3,
+ alpha_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
@@ -216,14 +234,16 @@ TEST(PReluTest, SInt16_CWQ_spatial_alpha_NEG)
TEST(PReluTest, SInt16_CWQ_wrong_dim_quant_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input_data(6); // data is not important
std::vector<float> alpha_data(6);
std::vector<float> alpha_scales{0.25f};
std::vector<int32_t> zerop{0};
- Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
- Tensor alpha_tensor =
- makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1, alpha_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1,
+ alpha_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
@@ -232,19 +252,22 @@ TEST(PReluTest, SInt16_CWQ_wrong_dim_quant_NEG)
TEST(PReluTest, SInt16_CWQ_uneven_shape1)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
std::vector<float> alpha_data{0.5f, 0.25f};
std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
std::vector<float> alpha_scales{0.05f, 0.025f};
std::vector<int32_t> zerop{0, 0};
- Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
- Tensor alpha_tensor =
- makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2, alpha_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2,
+ alpha_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
@@ -253,6 +276,7 @@ TEST(PReluTest, SInt16_CWQ_uneven_shape1)
TEST(PReluTest, SInt16_CWQ_uneven_shape2)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input_data{
0.0f, 0.0f, 0.0f, // Row 1, Column 1
0.5f, 0.5f, 0.5f, // Row 1, Column 2
@@ -269,13 +293,15 @@ TEST(PReluTest, SInt16_CWQ_uneven_shape2)
std::vector<float> alpha_scales{1.f, 0.05f, 0.1f};
std::vector<int32_t> zerop{0, 0, 0};
- Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data);
- Tensor alpha_tensor =
- makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3, alpha_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3,
+ alpha_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.001, 0);
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3}));
@@ -284,8 +310,9 @@ TEST(PReluTest, SInt16_CWQ_uneven_shape2)
TEST(PReluTest, Input_Output_Type_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor alpha_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
@@ -294,8 +321,9 @@ TEST(PReluTest, Input_Output_Type_NEG)
TEST(PReluTest, Input_Alpha_Type_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor alpha_tensor = makeInputTensor<DataType::U8>({1}, {1});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::U8>({1}, {1}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
@@ -304,23 +332,29 @@ TEST(PReluTest, Input_Alpha_Type_NEG)
TEST(PReluTest, Invalid_Input_Type_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
- Tensor alpha_tensor = makeInputTensor<DataType::S64>({1}, {1});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+ Tensor alpha_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S64);
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
EXPECT_ANY_THROW(kernel.execute());
}
TEST(PReluTest, Input_Output_U8_CWQ_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> scales{1.f, 1.f};
std::vector<int32_t> zerop{0, 0};
std::vector<float> dummy_data(4, 0.f);
- Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data);
- Tensor alpha_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data);
- Tensor output_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+ Tensor output_tensor =
+ makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
@@ -328,12 +362,16 @@ TEST(PReluTest, Input_Output_U8_CWQ_NEG)
TEST(PReluTest, Input_Output_S16_CWQ_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> scales{1.f, 1.f};
std::vector<int32_t> zerop{0, 0};
std::vector<float> dummy_data(4, 0.f);
- Tensor input_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data);
- Tensor alpha_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data);
- Tensor output_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+ Tensor output_tensor =
+ makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
@@ -341,10 +379,14 @@ TEST(PReluTest, Input_Output_S16_CWQ_NEG)
TEST(PReluTest, Mixing_U8_S16_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> dummy_data(4, 0.f);
- Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data);
- Tensor alpha_tensor = makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data);
- Tensor output_tensor = makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+ Tensor alpha_tensor =
+ makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+ Tensor output_tensor =
+ makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
diff --git a/compiler/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
index 092bd449a..90a0f894e 100644
--- a/compiler/luci-interpreter/src/kernels/Pack.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Pack.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -31,6 +32,7 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
std::initializer_list<int32_t> output_shape, std::vector<std::vector<T>> input_datas,
std::initializer_list<T> output_data, int32_t axis)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
constexpr DataType element_type = getElementType<T>();
std::vector<const Tensor *> inputs(input_datas.size());
std::vector<Tensor> tmp_inputs;
@@ -39,11 +41,13 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
if (std::is_same<T, float>::value)
{
tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, ""));
+ memory_manager->allocate_memory(tmp_inputs[i]);
tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
}
else
{
tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, ""));
+ memory_manager->allocate_memory(tmp_inputs[i]);
tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
}
}
@@ -64,6 +68,7 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
Pack kernel(inputs, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -103,12 +108,13 @@ TYPED_TEST(PackTest, NegAxis)
TEST(Pack, MismatchingInputValuesCount_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input1_data{1, 4};
std::vector<float> input2_data{2, 5};
std::vector<float> input3_data{3, 6};
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data);
- Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data);
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get());
+ Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
PackParams params{};
{
@@ -122,12 +128,13 @@ TEST(Pack, MismatchingInputValuesCount_NEG)
TEST(Pack, InvalidInputAxis_NEG)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input1_data{1, 4};
std::vector<float> input2_data{2, 5};
std::vector<float> input3_data{3, 6};
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data);
- Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data);
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get());
+ Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
PackParams params{};
{
diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp
index 3e76080a9..700448e7a 100644
--- a/compiler/luci-interpreter/src/kernels/Pad.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.cpp
@@ -18,7 +18,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
namespace luci_interpreter
{
diff --git a/compiler/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
index 75b2e560e..7994263e2 100644
--- a/compiler/luci-interpreter/src/kernels/Pad.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Pad.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -30,17 +31,20 @@ float GetTolerance(float min, float max) { return (max - min) / 255.0; }
TEST(Pad, Uint8)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, input_data);
- Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0,
@@ -52,14 +56,18 @@ TEST(Pad, Uint8)
TEST(Pad, Float)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input_data{1, 2, 3, 4, 5, 6};
std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data);
- Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
diff --git a/compiler/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-interpreter/src/kernels/PadV2.cpp
index 3c215dbca..e90469239 100644
--- a/compiler/luci-interpreter/src/kernels/PadV2.cpp
+++ b/compiler/luci-interpreter/src/kernels/PadV2.cpp
@@ -18,7 +18,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
namespace luci_interpreter
{
diff --git a/compiler/luci-interpreter/src/kernels/PadV2.test.cpp b/compiler/luci-interpreter/src/kernels/PadV2.test.cpp
index 1ee741401..41efaff06 100644
--- a/compiler/luci-interpreter/src/kernels/PadV2.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/PadV2.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/PadV2.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -30,20 +31,23 @@ float GetTolerance(float min, float max) { return (max - min) / 255.0; }
TEST(PadV2, Uint8)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
std::vector<float> constant_values_data{0.5};
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, input_data);
- Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
- Tensor constant_values =
- makeInputTensor<DataType::U8>({1}, quant_param.first, quant_param.second, constant_values_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+ Tensor constant_values = makeInputTensor<DataType::U8>(
+ {1}, quant_param.first, quant_param.second, constant_values_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data = {
@@ -56,16 +60,21 @@ TEST(PadV2, Uint8)
TEST(PadV2, Float)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
std::vector<float> input_data{1, 2, 3, 4, 5, 6};
std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0};
std::vector<float> constant_values_data{7};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data);
- Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
- Tensor constant_values = makeInputTensor<DataType::FLOAT32>({1}, constant_values_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+ Tensor constant_values =
+ makeInputTensor<DataType::FLOAT32>({1}, constant_values_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
diff --git a/compiler/luci-interpreter/src/kernels/Pow.test.cpp b/compiler/luci-interpreter/src/kernels/Pow.test.cpp
index a414440c9..0e858115d 100644
--- a/compiler/luci-interpreter/src/kernels/Pow.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pow.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Pow.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -26,7 +27,15 @@ namespace
using namespace testing;
-TEST(PowTest, SimplePow)
+class PowTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(PowTest, SimplePow)
{
std::initializer_list<int32_t> base_shape = {1, 1, 3, 2};
@@ -34,19 +43,22 @@ TEST(PowTest, SimplePow)
std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
std::vector<float> test_outputs{0.786f, 1.2838f, 1.043f, 0.7071f, 0.8f, 1.08956f};
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input2_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
}
-TEST(PowTest, FloatBroadcastPow)
+TEST_F(PowTest, FloatBroadcastPow)
{
std::initializer_list<int32_t> input1_shape = {1, 3};
std::initializer_list<int32_t> input2_shape = {3, 1};
@@ -56,60 +68,66 @@ TEST(PowTest, FloatBroadcastPow)
std::vector<float> test_outputs{0.786f, 1.18126f, 0.9791f, 0.6968f, 1.28386f,
0.96888f, 0.6178f, 1.3953f, 0.9587f};
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
}
-TEST(PowTest, IntPow)
+TEST_F(PowTest, IntPow)
{
std::initializer_list<int32_t> base_shape = {1, 3};
std::vector<int32_t> input_data{2, 3, 4};
std::vector<int32_t> test_outputs{4, 27, 256};
- Tensor input1_tensor = makeInputTensor<DataType::S32>(base_shape, input_data);
- Tensor input2_tensor = makeInputTensor<DataType::S32>(base_shape, input_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S32);
Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(test_outputs));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
}
-TEST(PowTest, Input_Output_Type_NEG)
+TEST_F(PowTest, Input_Output_Type_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f});
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f});
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::BOOL);
Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(PowTest, Input_Type_Mismatch_NEG)
+TEST_F(PowTest, Input_Type_Mismatch_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f});
- Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {4});
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {4}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(PowTest, Invalid_Input_Type_NEG)
+TEST_F(PowTest, Invalid_Input_Type_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1});
- Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {1});
+ Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S64);
Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/Relu.cpp b/compiler/luci-interpreter/src/kernels/Relu.cpp
index b5acf1d60..747ec6cc8 100644
--- a/compiler/luci-interpreter/src/kernels/Relu.cpp
+++ b/compiler/luci-interpreter/src/kernels/Relu.cpp
@@ -17,7 +17,7 @@
#include "kernels/Relu.h"
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALRelu.h"
#include <stdexcept>
@@ -70,7 +70,7 @@ void Relu::evalFloat() const
auto output_data = getTensorData<float>(output());
auto output_shape = getTensorShape(output());
- tflite::optimized_ops::Relu(input_shape, input_data, output_shape, output_data);
+ luci_interpreter_pal::Relu(input_shape, input_data, output_shape, output_data);
}
void Relu::evalQuantized() const
@@ -85,8 +85,8 @@ void Relu::evalQuantized() const
std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max());
- tflite::optimized_ops::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
+ luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
}
void Relu::evalQuantizedS16() const
diff --git a/compiler/luci-interpreter/src/kernels/Relu.test.cpp b/compiler/luci-interpreter/src/kernels/Relu.test.cpp
index 6623a5b77..bd32e3cc9 100644
--- a/compiler/luci-interpreter/src/kernels/Relu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Relu.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Relu.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,7 +28,15 @@ namespace
using namespace testing;
-TEST(ReluTest, FloatSimple)
+class ReluTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ReluTest, FloatSimple)
{
std::vector<float> input_data{
0.0f, 1.0f, 3.0f, // Row 1
@@ -39,11 +48,13 @@ TEST(ReluTest, FloatSimple)
1.0f, 0.0f, 0.0f, // Row 2
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Relu kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -51,7 +62,7 @@ TEST(ReluTest, FloatSimple)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
}
-TEST(ReluTest, Uint8Quantized)
+TEST_F(ReluTest, Uint8Quantized)
{
std::vector<float> input_data{
0, -6, 2, 4, //
@@ -62,12 +73,13 @@ TEST(ReluTest, Uint8Quantized)
const float f_max = (127.0 / 128.0) * 8;
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
Relu kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
@@ -76,7 +88,7 @@ TEST(ReluTest, Uint8Quantized)
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1}));
}
-TEST(ReluTest, Uint8Requantized)
+TEST_F(ReluTest, Uint8Requantized)
{
std::vector<float> input_data{
0, -6, 2, 4, //
@@ -90,14 +102,15 @@ TEST(ReluTest, Uint8Requantized)
const float out_max = (255.0 / 256.0) * 8;
std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first, quant_input.second, input_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get());
std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
Relu kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
@@ -106,7 +119,7 @@ TEST(ReluTest, Uint8Requantized)
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1}));
}
-TEST(ReluTest, SInt16)
+TEST_F(ReluTest, SInt16)
{
std::vector<float> input_data{
0, -6, 2, 4, //
@@ -117,33 +130,36 @@ TEST(ReluTest, SInt16)
3, 0, 7, 1, //
};
- Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.25, 0);
Relu kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
}
-TEST(ReluTest, Input_Output_Type_NEG)
+TEST_F(ReluTest, Input_Output_Type_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
Relu kernel(&input_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(ReluTest, Invalid_Input_Type_NEG)
+TEST_F(ReluTest, Invalid_Input_Type_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S64);
Relu kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
EXPECT_ANY_THROW(kernel.execute());
}
diff --git a/compiler/luci-interpreter/src/kernels/Relu6.cpp b/compiler/luci-interpreter/src/kernels/Relu6.cpp
index fa7aa504a..07205ed3a 100644
--- a/compiler/luci-interpreter/src/kernels/Relu6.cpp
+++ b/compiler/luci-interpreter/src/kernels/Relu6.cpp
@@ -17,7 +17,7 @@
#include "kernels/Relu6.h"
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALRelu6.h"
#include <stdexcept>
@@ -63,7 +63,7 @@ void Relu6::evalFloat() const
auto output_data = getTensorData<float>(output());
auto output_shape = getTensorShape(output());
- tflite::optimized_ops::Relu6(input_shape, input_data, output_shape, output_data);
+ luci_interpreter_pal::Relu6(input_shape, input_data, output_shape, output_data);
}
void Relu6::evalQuantized() const
@@ -80,8 +80,8 @@ void Relu6::evalQuantized() const
std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
- tflite::optimized_ops::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- getTensorShape(output()), getTensorData<uint8_t>(output()));
+ luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+ getTensorShape(output()), getTensorData<uint8_t>(output()));
}
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Relu6.test.cpp b/compiler/luci-interpreter/src/kernels/Relu6.test.cpp
index fe991389a..af7b3f3db 100644
--- a/compiler/luci-interpreter/src/kernels/Relu6.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Relu6.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Relu6.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,7 +28,15 @@ namespace
using namespace testing;
-TEST(Relu6Test, FloatSimple)
+class Relu6Test : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(Relu6Test, FloatSimple)
{
std::vector<float> input_data{
0.0f, 1.0f, 3.0f, // Row 1
@@ -39,11 +48,13 @@ TEST(Relu6Test, FloatSimple)
6.0f, 0.0f, 0.0f, // Row 2
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Relu6 kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -51,7 +62,7 @@ TEST(Relu6Test, FloatSimple)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
}
-TEST(Relu6Test, Uint8Quantized)
+TEST_F(Relu6Test, Uint8Quantized)
{
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float f_min = (-128.0 / 128.0) * 10;
@@ -64,12 +75,13 @@ TEST(Relu6Test, Uint8Quantized)
};
std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
Relu6 kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
@@ -79,7 +91,7 @@ TEST(Relu6Test, Uint8Quantized)
FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance));
}
-TEST(Relu6Test, Uint8Requantized)
+TEST_F(Relu6Test, Uint8Requantized)
{
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float in_min = (-128.0 / 128.0) * 10;
@@ -94,14 +106,15 @@ TEST(Relu6Test, Uint8Requantized)
};
std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first, quant_input.second, input_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get());
std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
Relu6 kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
@@ -111,22 +124,23 @@ TEST(Relu6Test, Uint8Requantized)
FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance));
}
-TEST(Relu6Test, Input_Output_Type_NEG)
+TEST_F(Relu6Test, Input_Output_Type_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
Relu6 kernel(&input_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(Relu6Test, Invalid_Input_Type_NEG)
+TEST_F(Relu6Test, Invalid_Input_Type_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S64);
Relu6 kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
EXPECT_ANY_THROW(kernel.execute());
}
diff --git a/compiler/luci-interpreter/src/kernels/Reshape.test.cpp b/compiler/luci-interpreter/src/kernels/Reshape.test.cpp
index 38159380f..c2ff3ea1b 100644
--- a/compiler/luci-interpreter/src/kernels/Reshape.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Reshape.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Reshape.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -26,37 +27,51 @@ namespace
using namespace testing;
+class ReshapeTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
// TODO Test types other than FLOAT32.
-TEST(ReshapeTest, Regular)
+TEST_F(ReshapeTest, Regular)
{
Shape input_shape{1, 2, 2, 3};
std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
Shape shape_shape{2};
std::vector<int32_t> shape_data{3, 4};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor shape_tensor = makeInputTensor<DataType::S32>(shape_shape, shape_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor shape_tensor =
+ makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
}
-TEST(ReshapeTest, UnknownDimension)
+TEST_F(ReshapeTest, UnknownDimension)
{
Shape input_shape{2, 1, 2, 3};
std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
Shape shape_shape{3};
std::vector<int32_t> shape_data{2, -1, 2};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor shape_tensor = makeInputTensor<DataType::S32>(shape_shape, shape_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor shape_tensor =
+ makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp
index 0e9bcc920..e2ddd6a7b 100644
--- a/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp
@@ -19,7 +19,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALResizeBilinear.h"
namespace luci_interpreter
{
@@ -56,12 +56,12 @@ void ResizeBilinear::execute() const
switch (output()->element_type())
{
case DataType::FLOAT32:
- tflite::optimized_ops::ResizeBilinear(
+ luci_interpreter_pal::ResizeBilinear(
op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
break;
case DataType::U8:
- tflite::optimized_ops::ResizeBilinear(
+ luci_interpreter_pal::ResizeBilinear(
op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
break;
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
index 68ef6e6c1..7af20f8c4 100644
--- a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/ResizeBilinear.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -33,8 +34,10 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
bool align_corners, bool half_pixel_centers)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ResizeBilinearParams params{};
@@ -43,6 +46,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -60,8 +64,11 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
{
// On TFlite example use Uint8 value it self, so this means quant param scale 1.0f and zero
// point 0.
- Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, 1.0, 0, input_data);
- Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, 1.0, 0, input_data, memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0, 0);
ResizeBilinearParams params{};
@@ -70,6 +77,7 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -152,13 +160,17 @@ TEST(ResizeBilinearTest, HalfPixelCenterUint8Test)
TEST(ResizeBilinearTest, InputShapeInvalid_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, {
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
- });
- Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ResizeBilinearParams params{};
@@ -171,13 +183,17 @@ TEST(ResizeBilinearTest, InputShapeInvalid_NEG)
TEST(ResizeBilinearTest, SizeShapeInvalid_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
- });
- Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ResizeBilinearParams params{};
@@ -190,13 +206,17 @@ TEST(ResizeBilinearTest, SizeShapeInvalid_NEG)
TEST(ResizeBilinearTest, SizeDimInvalid_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
- });
- Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ResizeBilinearParams params{};
@@ -209,13 +229,17 @@ TEST(ResizeBilinearTest, SizeDimInvalid_NEG)
TEST(ResizeBilinearTest, InvalidParams_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
- });
- Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ResizeBilinearParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
index c52264997..306cefbc2 100644
--- a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
@@ -19,8 +19,8 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+#include "PALResizeNearestNeighbor.h"
namespace luci_interpreter
{
@@ -61,7 +61,7 @@ void ResizeNearestNeighbor::execute() const
getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output()));
break;
case DataType::U8:
- tflite::optimized_ops::ResizeNearestNeighbor(
+ luci_interpreter_pal::ResizeNearestNeighbor(
op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
break;
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
index 0b36a29af..0e9017c78 100644
--- a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/ResizeNearestNeighbor.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -33,8 +34,11 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
bool align_corners, bool half_pixel_centers)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ResizeNearestNeighborParams params{};
@@ -43,6 +47,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -58,12 +63,14 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
std::initializer_list<float> output_data, bool align_corners,
bool half_pixel_centers)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
std::pair<float, int32_t> quant_param =
quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
std::max(input_data) > 0 ? std::max(input_data) : 0.f);
- Tensor input_tensor =
- makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
- Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.first);
ResizeNearestNeighborParams params{};
@@ -72,6 +79,7 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -151,13 +159,17 @@ TYPED_TEST(ResizeNearestNeighborTest, HalfPixelCenterTest)
TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, {
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
- });
- Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ResizeNearestNeighborParams params{};
@@ -170,13 +182,17 @@ TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG)
TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
- });
- Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ResizeNearestNeighborParams params{};
@@ -189,13 +205,17 @@ TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG)
TEST(ResizeNearestNeighborTest, SizeDimInvalid_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
- 3, 6, //
- 9, 12, //
- 4, 10, //
- 10, 16 //
- });
- Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+ {
+ 3, 6, //
+ 9, 12, //
+ 4, 10, //
+ 10, 16 //
+ },
+ memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
ResizeNearestNeighborParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
index 6e1e6c03c..2bd94875b 100644
--- a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/ReverseV2.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -36,6 +37,8 @@ TYPED_TEST_CASE(ReverseV2Test, DataTypes);
TYPED_TEST(ReverseV2Test, MultiDimensions)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
// TypeParam
std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
@@ -47,13 +50,15 @@ TYPED_TEST(ReverseV2Test, MultiDimensions)
17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20};
std::vector<int32_t> output_shape{4, 3, 2};
- Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
- Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data);
+ Tensor input_tensor =
+ makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
ReverseV2 kernel = ReverseV2(&input_tensor, &axis_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
index b93a04ddd..3c6494232 100644
--- a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Rsqrt.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -29,11 +30,15 @@ using namespace testing;
void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
std::initializer_list<float> input_data, std::initializer_list<float> output_data)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Rsqrt kernel(&input_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
@@ -58,7 +63,9 @@ TEST(RsqrtTest, SimpleRsqrt)
TEST(RsqrtTest, Input_Output_Type_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S32);
Rsqrt kernel(&input_tensor, &output_tensor);
@@ -67,11 +74,14 @@ TEST(RsqrtTest, Input_Output_Type_NEG)
TEST(RsqrtTest, Invalid_Input_Type_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S64);
Rsqrt kernel(&input_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
EXPECT_ANY_THROW(kernel.execute());
}
diff --git a/compiler/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-interpreter/src/kernels/Slice.cpp
index 626521815..37a834a18 100644
--- a/compiler/luci-interpreter/src/kernels/Slice.cpp
+++ b/compiler/luci-interpreter/src/kernels/Slice.cpp
@@ -16,7 +16,7 @@
#include "kernels/Slice.h"
#include "Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALSlice.h"
#include <cassert>
#include <cstring>
@@ -131,14 +131,13 @@ void Slice::execute() const
switch (input()->element_type())
{
case DataType::FLOAT32:
- tflite::optimized_ops::Slice(op_params, getTensorShape(input()),
- getTensorData<float>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
+ luci_interpreter_pal::Slice(op_params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(output()), getTensorData<float>(output()));
break;
case DataType::U8:
- tflite::optimized_ops::Slice(op_params, getTensorShape(input()),
- getTensorData<uint8_t>(input()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
+ luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
break;
default:
throw std::runtime_error("Unsupported input type.");
diff --git a/compiler/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-interpreter/src/kernels/Slice.test.cpp
index a360a29cc..3e0d0b0d7 100644
--- a/compiler/luci-interpreter/src/kernels/Slice.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Slice.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Slice.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -35,6 +36,8 @@ TYPED_TEST_CASE(SliceTest, DataTypes);
TYPED_TEST(SliceTest, SimpleTest)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
std::vector<TypeParam> input_data{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6};
Shape input_shape{3, 2, 3, 1};
std::vector<int32_t> begin_data{1, 0, 0, 0};
@@ -44,14 +47,17 @@ TYPED_TEST(SliceTest, SimpleTest)
std::vector<TypeParam> output_data{3, 3, 3, 5, 5, 5};
std::vector<int32_t> output_shape{2, 1, 3, 1};
- Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
- Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data);
- Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+ Tensor input_tensor =
+ makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+ Tensor begin_tensor =
+ makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+ Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
Slice kernel(&input_tensor, &begin_tensor, &size_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.cpp b/compiler/luci-interpreter/src/kernels/Softmax.cpp
index 8e29f53ee..c230aaa70 100644
--- a/compiler/luci-interpreter/src/kernels/Softmax.cpp
+++ b/compiler/luci-interpreter/src/kernels/Softmax.cpp
@@ -19,7 +19,7 @@
#include "kernels/Utils.h"
#include <tensorflow/lite/kernels/internal/reference/softmax.h>
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALSoftmax.h"
#include <stdexcept>
@@ -40,10 +40,12 @@ void Softmax::configure()
LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= 1);
if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8)
{
- LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 || output()->zero_point() == 0);
+ LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8 ||
+ output()->zero_point() == std::numeric_limits<int8_t>::min());
tflite::SoftmaxParams op_params{};
op_params.table = _table;
- tflite::optimized_ops::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta);
+ luci_interpreter_pal::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta);
}
output()->resize(input()->shape());
}
@@ -81,9 +83,9 @@ template <typename T> void Softmax::evalQuantized() const
op_params.table = const_cast<float *>(_table);
op_params.zero_point = output()->zero_point();
op_params.scale = output()->scale();
-
- tflite::optimized_ops::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()),
- getTensorShape(output()), getTensorData<T>(output()));
+ luci_interpreter_pal::InitializeParams(&op_params, input()->scale(), params().beta);
+ luci_interpreter_pal::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()),
+ getTensorShape(output()), getTensorData<T>(output()));
}
} // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
index c69a2f9cc..9de40b6ec 100644
--- a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Softmax.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -26,46 +27,60 @@ namespace
using namespace testing;
-template <typename T>
+template <typename T> constexpr loco::DataType toLocoDataType();
+
+template <> constexpr loco::DataType toLocoDataType<float>() { return loco::DataType::FLOAT32; }
+
+template <> constexpr loco::DataType toLocoDataType<uint8_t>() { return loco::DataType::U8; }
+
+template <> constexpr loco::DataType toLocoDataType<int8_t>() { return loco::DataType::S8; }
+
+template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
std::initializer_list<float> input_data, std::initializer_list<float> output_data)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<toLocoDataType<T>()>(input_shape, input_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(toLocoDataType<T>());
SoftmaxParams params{};
params.beta = 0.1;
Softmax kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
}
-template <>
-void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
- std::initializer_list<int32_t> output_shape,
- std::initializer_list<float> input_data,
- std::initializer_list<float> output_data)
+template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<float> input_data, std::initializer_list<float> output_data)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
std::pair<float, int32_t> input_quant_param =
- quantizationParams<uint8_t>(std::min<float>(std::min<float>(input_data), 0.f),
- std::max<float>(std::max<float>(input_data), 0.f));
+ quantizationParams<T>(std::min<float>(std::min<float>(input_data), 0.f),
+ std::max<float>(std::max<float>(input_data), 0.f));
std::pair<float, int32_t> output_quant_param =
- quantizationParams<uint8_t>(std::min<float>(std::min<float>(output_data), 0.f),
- std::max<float>(std::max<float>(output_data), 0.f));
- Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
- input_quant_param.second, input_data);
+ quantizationParams<T>(std::min<float>(std::min<float>(output_data), 0.f),
+ std::max<float>(std::max<float>(output_data), 0.f));
+ Tensor input_tensor = makeInputTensor<toLocoDataType<T>()>(input_shape, input_quant_param.first,
+ input_quant_param.second, input_data,
+ memory_manager.get());
Tensor output_tensor =
- makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ makeOutputTensor(toLocoDataType<T>(), output_quant_param.first, output_quant_param.second);
SoftmaxParams params{};
params.beta = 0.1;
Softmax kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -77,7 +92,7 @@ template <typename T> class SoftmaxTest : public ::testing::Test
{
};
-using DataTypes = ::testing::Types<float, uint8_t>;
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
TYPED_TEST_CASE(SoftmaxTest, DataTypes);
TYPED_TEST(SoftmaxTest, Simple)
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp
index 2f6a47925..630cd38c4 100644
--- a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp
@@ -18,7 +18,7 @@
#include "kernels/SpaceToBatchND.h"
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALSpaceToBatchND.h"
#include <stdexcept>
@@ -80,7 +80,7 @@ void SpaceToBatchND::execute() const
tflite::SpaceToBatchParams op_params;
case DataType::FLOAT32:
op_params.output_offset = 0;
- tflite::optimized_ops::SpaceToBatchND(
+ luci_interpreter_pal::SpaceToBatchND(
op_params, getTensorShape(input()), getTensorData<float>(input()),
getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
@@ -88,7 +88,7 @@ void SpaceToBatchND::execute() const
break;
case DataType::U8:
op_params.output_offset = output()->zero_point();
- tflite::optimized_ops::SpaceToBatchND(
+ luci_interpreter_pal::SpaceToBatchND(
op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
index a6ec6f23f..e06501c8c 100644
--- a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/SpaceToBatchND.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -34,14 +35,19 @@ void Check(std::initializer_list<int32_t> input_shape,
std::initializer_list<int32_t> block_shape_data,
std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
constexpr DataType element_type = getElementType<T>();
- Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
- Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data);
- Tensor paddings_tensor = makeInputTensor<DataType::S32>(paddings_shape, paddings_data);
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor block_shape_tensor =
+ makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(element_type);
SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -55,17 +61,23 @@ void Check<uint8_t>(
std::initializer_list<float> input_data, std::initializer_list<int32_t> block_shape_data,
std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
std::pair<float, int32_t> input_quant_param =
quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
- Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
- input_quant_param.second, input_data);
- Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data);
- Tensor paddings_tensor = makeInputTensor<DataType::S32>(paddings_shape, paddings_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, memory_manager.get());
+ Tensor block_shape_tensor =
+ makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get());
Tensor output_tensor =
makeOutputTensor(DataType::U8, input_quant_param.first, input_quant_param.second);
SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -93,10 +105,13 @@ TYPED_TEST(SpaceToBatchNDTest, Simple)
TEST(SpaceToBatchNDTest, Invalid_Shape_NEG)
{
- Tensor input_tensor =
- makeInputTensor<DataType::FLOAT32>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
- Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2});
- Tensor paddings_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+ {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, memory_manager.get());
+ Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp
index fc999372a..7c29e8cb0 100644
--- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp
@@ -16,7 +16,7 @@
#include "SpaceToDepth.h"
#include "Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALSpaceToDepth.h"
namespace luci_interpreter
{
@@ -61,14 +61,14 @@ void SpaceToDepth::execute() const
switch (input()->element_type())
{
case DataType::FLOAT32:
- tflite::optimized_ops::SpaceToDepth(op_params, getTensorShape(input()),
- getTensorData<float>(input()), getTensorShape(output()),
- getTensorData<float>(output()));
+ luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+ getTensorData<float>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
break;
case DataType::U8:
- tflite::optimized_ops::SpaceToDepth(op_params, getTensorShape(input()),
- getTensorData<uint8_t>(input()), getTensorShape(output()),
- getTensorData<uint8_t>(output()));
+ luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
break;
default:
throw std::runtime_error("Unsupported type.");
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
index 77b6655dc..735c010b9 100644
--- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/SpaceToDepth.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -35,10 +36,13 @@ TYPED_TEST_CASE(SpaceToDepthTest, DataTypes);
TYPED_TEST(SpaceToDepthTest, SimpleCase)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
constexpr DataType element_type = getElementType<TypeParam>();
std::vector<TypeParam> input_data{1, 5, 6, 7, 2, 3, 4, 8};
Shape input_shape{1, 2, 2, 2};
- Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
std::vector<TypeParam> output_data{1, 5, 6, 7, 2, 3, 4, 8};
std::vector<int32_t> output_shape{1, 1, 1, 8};
Tensor output_tensor = makeOutputTensor(element_type);
@@ -48,6 +52,7 @@ TYPED_TEST(SpaceToDepthTest, SimpleCase)
SpaceToDepth kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
diff --git a/compiler/luci-interpreter/src/kernels/Split.cpp b/compiler/luci-interpreter/src/kernels/Split.cpp
index 0da0f3779..1a563f307 100644
--- a/compiler/luci-interpreter/src/kernels/Split.cpp
+++ b/compiler/luci-interpreter/src/kernels/Split.cpp
@@ -18,7 +18,7 @@
#include "Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALSplit.h"
namespace luci_interpreter
{
@@ -56,11 +56,11 @@ void Split::execute() const
params.num_split = _outputs.size();
params.axis = _axis_value;
-#define TF_LITE_SPLIT(scalar) \
- { \
- VectorOfTensors<scalar, false> all_outputs(_outputs); \
- tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
- all_outputs.shapes(), all_outputs.data()); \
+#define TF_LITE_SPLIT(scalar) \
+ { \
+ VectorOfTensors<scalar, false> all_outputs(_outputs); \
+ luci_interpreter_pal::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+ all_outputs.shapes(), all_outputs.data()); \
}
switch (input()->element_type())
diff --git a/compiler/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-interpreter/src/kernels/Split.test.cpp
index c558928e8..74d57aed3 100644
--- a/compiler/luci-interpreter/src/kernels/Split.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Split.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Split.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -32,9 +33,12 @@ void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape,
std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
std::vector<std::vector<T>> output_data)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
constexpr DataType element_type = getElementType<T>();
- Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis});
- Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get());
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
std::vector<Tensor> output_tensors;
output_tensors.reserve(num_splits);
@@ -51,6 +55,10 @@ void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape,
Split kernel(&axis_tensor, &input_tensor, std::move(output_tensor_ptrs));
kernel.configure();
+ for (int i = 0; i < num_splits; ++i)
+ {
+ memory_manager->allocate_memory(output_tensors[i]);
+ }
kernel.execute();
for (int i = 0; i < num_splits; ++i)
diff --git a/compiler/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-interpreter/src/kernels/SplitV.cpp
new file mode 100644
index 000000000..281988272
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SplitV.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SplitV.h"
+
+#include "Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SplitV::SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
+ std::vector<Tensor *> outputs)
+ : Kernel({input, size_splits, axis}, std::move(outputs))
+{
+}
+
+void SplitV::configure()
+{
+ assert(axis()->shape().num_elements() == 1);
+ _axis_value = getTensorData<int32_t>(axis())[0];
+ if (_axis_value < 0)
+ _axis_value += input()->shape().num_dims();
+ assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
+
+ auto num_split = static_cast<int32_t>(_outputs.size());
+ auto sizes_data = getTensorData<int32_t>(size_splits());
+
+ assert(size_splits()->shape().num_dims() == 1);
+ assert(size_splits()->shape().num_elements() == num_split);
+ assert(std::accumulate(sizes_data, sizes_data + num_split, 0) ==
+ input()->shape().dim(_axis_value));
+
+ auto output_shape = input()->shape();
+ for (int32_t i = 0; i < num_split; ++i)
+ {
+ output_shape.dim(_axis_value) = sizes_data[i];
+ _outputs[i]->resize(output_shape);
+ }
+}
+
+void SplitV::execute() const
+{
+ tflite::SplitParams params{};
+ params.num_split = _outputs.size();
+ params.axis = _axis_value;
+
+#define TF_LITE_SPLIT(scalar) \
+ { \
+ VectorOfTensors<scalar, false> all_outputs(_outputs); \
+ tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+ all_outputs.shapes(), all_outputs.data()); \
+ }
+
+ switch (input()->element_type())
+ {
+ case DataType::FLOAT32:
+ TF_LITE_SPLIT(float);
+ break;
+ case DataType::U8:
+ TF_LITE_SPLIT(uint8_t);
+ break;
+ case DataType::S16:
+ TF_LITE_SPLIT(int16_t);
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+#undef TF_LITE_SPLIT
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SplitV.h b/compiler/luci-interpreter/src/kernels/SplitV.h
new file mode 100644
index 000000000..92f6288fb
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SplitV.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_V_H
+#define LUCI_INTERPRETER_KERNELS_SPLIT_V_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SplitV : public Kernel
+{
+public:
+ SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
+ std::vector<Tensor *> outputs);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *size_splits() const { return _inputs[1]; }
+ const Tensor *axis() const { return _inputs[2]; }
+ Tensor *output(int index) const { return _outputs[index]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ int32_t _axis_value{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPLIT_V_H
diff --git a/compiler/luci-interpreter/src/kernels/SplitV.test.cpp b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp
new file mode 100644
index 000000000..aac0567d7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SplitV.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, std::initializer_list<int32_t> splits_size,
+ std::initializer_list<int32_t> input_shape, std::initializer_list<T> input_data,
+ std::vector<std::vector<T>> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ constexpr DataType element_type = getElementType<T>();
+
+ auto num_splits = static_cast<int32_t>(splits_size.size());
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor sizes_tensor =
+ makeInputTensor<DataType::S32>({num_splits}, splits_size, memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get());
+
+ std::vector<Tensor> output_tensors;
+ output_tensors.reserve(num_splits);
+ for (int i = 0; i < num_splits; ++i)
+ {
+ output_tensors.emplace_back(makeOutputTensor(element_type));
+ }
+
+ std::vector<Tensor *> output_tensor_ptrs(num_splits);
+ for (int i = 0; i < num_splits; ++i)
+ {
+ output_tensor_ptrs[i] = &output_tensors[i];
+ }
+
+ SplitV kernel(&input_tensor, &sizes_tensor, &axis_tensor, std::move(output_tensor_ptrs));
+ kernel.configure();
+ for (int i = 0; i < num_splits; ++i)
+ {
+ memory_manager->allocate_memory(output_tensors[i]);
+ }
+ kernel.execute();
+
+ for (int i = 0; i < num_splits; ++i)
+ {
+ auto tmp = extractTensorData<T>(output_tensors[i]);
+ EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+ ::testing::ElementsAreArray(output_data[i]));
+ }
+}
+
+template <typename T> class SplitVTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
+TYPED_TEST_CASE(SplitVTest, DataTypes);
+
+TYPED_TEST(SplitVTest, ThreeDimensional)
+{
+ Check<TypeParam>(
+ /*axis=*/0, /*splits_size=*/{1, 2}, {3, 3, 3},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+ {
+ {1, 2, 3, 4, 5, 6, 7, 8, 9}, //
+ {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27} //
+ });
+ Check<TypeParam>(
+ /*axis=*/1, /*splits_size=*/{1, 2}, {3, 3, 3},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+ {
+ {1, 2, 3, 10, 11, 12, 19, 20, 21}, //
+ {4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 22, 23, 24, 25, 26, 27} //
+ });
+ Check<TypeParam>(
+ /*axis=*/2, /*splits_size=*/{1, 2}, {3, 3, 3},
+ {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+ {
+ {1, 4, 7, 10, 13, 16, 19, 22, 25}, //
+ {2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, 24, 26, 27} //
+ });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
index e40a91e97..96835fbfc 100644
--- a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Sqrt.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -29,11 +30,15 @@ using namespace testing;
void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
std::initializer_list<float> input_data, std::initializer_list<float> output_data)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Sqrt kernel(&input_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
@@ -58,20 +63,25 @@ TEST(SqrtTest, SimpleSqrt)
TEST(SqrtTest, Input_Output_Type_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S32);
Sqrt kernel(&input_tensor, &output_tensor);
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(AddTest, Invalid_Input_Type_NEG)
+TEST(SqrtTest, Invalid_Input_Type_NEG)
{
- Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S64);
Sqrt kernel(&input_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
EXPECT_ANY_THROW(kernel.execute());
}
diff --git a/compiler/luci-interpreter/src/kernels/Square.test.cpp b/compiler/luci-interpreter/src/kernels/Square.test.cpp
index 730d6405c..51662dea7 100644
--- a/compiler/luci-interpreter/src/kernels/Square.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Square.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Square.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -29,13 +30,17 @@ using namespace testing;
TEST(SquareTest, Float)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
Shape input_shape{3, 1, 2};
std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Square kernel(&input_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{1.0, 0.0, 1.0, 121.0, 4.0, 2.0736};
diff --git a/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp b/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp
index a72eaadfa..2819c01e2 100644
--- a/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/SquaredDifference.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -29,15 +30,20 @@ using namespace testing;
TEST(SquaredDifferenceTest, Float)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
Shape input_shape{3, 1, 2};
std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
- Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1);
- Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data2);
+ Tensor input_tensor1 =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{4.0, 0.0, 4.0, 1.0, 1.0, 0.0001};
@@ -46,16 +52,21 @@ TEST(SquaredDifferenceTest, Float)
TEST(SquaredDifferenceTest, FloatBroadcast)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
Shape input_shape1{3, 1, 2};
Shape input_shape2{1};
std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
std::vector<float> input_data2{1.0};
- Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1);
- Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2);
+ Tensor input_tensor1 =
+ makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1, memory_manager.get());
+ Tensor input_tensor2 =
+ makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{0.0, 1.0, 4.0, 100.0, 9.0, 5.9536};
diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
index 1c81893b9..d3326fe98 100644
--- a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Squeeze.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -31,8 +32,11 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
std::initializer_list<T> input_data, std::initializer_list<T> output_data,
std::initializer_list<int32_t> squeeze_dims)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
constexpr DataType element_type = getElementType<T>();
- Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(element_type);
SqueezeParams params{};
@@ -40,6 +44,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
Squeeze kernel(&input_tensor, &output_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
index 37b0dd8c5..c6452cdb0 100644
--- a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
+++ b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
@@ -19,7 +19,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/strided_slice.h>
#include <stdexcept>
diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp
index 66dffcaf2..399cdebed 100644
--- a/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/StridedSlice.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -28,6 +29,8 @@ using namespace testing;
TEST(StridedSliceTest, Float)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
Shape input_shape{2, 3, 2};
std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
Shape begin_shape{3};
@@ -36,10 +39,13 @@ TEST(StridedSliceTest, Float)
std::vector<int32_t> end_data{1, 3, 2};
Shape strides_shape{3};
std::vector<int32_t> strides_data{1, 1, 1};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
- Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data);
- Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data);
- Tensor strides_tensor = makeInputTensor<DataType::S32>(strides_shape, strides_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+ Tensor begin_tensor =
+ makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+ Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get());
+ Tensor strides_tensor =
+ makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
StridedSliceParams params{};
@@ -52,6 +58,7 @@ TEST(StridedSliceTest, Float)
StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor,
params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<int32_t> output_shape{3, 2};
@@ -62,6 +69,8 @@ TEST(StridedSliceTest, Float)
TEST(StridedSliceTest, Uint8)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
Shape input_shape{2, 3, 2};
std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
Shape begin_shape{3};
@@ -70,10 +79,13 @@ TEST(StridedSliceTest, Uint8)
std::vector<int32_t> end_data{1, 3, 2};
Shape strides_shape{3};
std::vector<int32_t> strides_data{1, 1, 1};
- Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, 1.0f, 0, input_data);
- Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data);
- Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data);
- Tensor strides_tensor = makeInputTensor<DataType::S32>(strides_shape, strides_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>(input_shape, 1.0f, 0, input_data, memory_manager.get());
+ Tensor begin_tensor =
+ makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+ Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get());
+ Tensor strides_tensor =
+ makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0f, 0);
StridedSliceParams params{};
@@ -86,6 +98,7 @@ TEST(StridedSliceTest, Uint8)
StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor,
params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<int32_t> output_shape{3, 2};
diff --git a/compiler/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-interpreter/src/kernels/Sub.cpp
index 3c7588d62..603c62d0f 100644
--- a/compiler/luci-interpreter/src/kernels/Sub.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sub.cpp
@@ -18,7 +18,9 @@
#include "kernels/Sub.h"
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+#include "PALSub.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
#include <stdexcept>
@@ -74,9 +76,9 @@ void Sub::evalFloat() const
}
else
{
- tflite::optimized_ops::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
- getTensorShape(input2()), getTensorData<float>(input2()),
- getTensorShape(output()), getTensorData<float>(output()));
+ luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
+ getTensorShape(input2()), getTensorData<float>(input2()),
+ getTensorShape(output()), getTensorData<float>(output()));
}
}
diff --git a/compiler/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-interpreter/src/kernels/Sub.test.cpp
index f560ceb36..c189f4481 100644
--- a/compiler/luci-interpreter/src/kernels/Sub.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sub.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Sub.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
#include <algorithm>
@@ -33,6 +34,14 @@ using std::vector;
using std::transform;
using std::initializer_list;
+class SubTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
// for quantized Add, the error shouldn't exceed step
float GetTolerance(float min, float max)
{
@@ -40,7 +49,7 @@ float GetTolerance(float min, float max)
return kQuantizedStep;
}
-TEST(SubTest, Uint8)
+TEST_F(SubTest, Uint8)
{
Shape base_shape = {2, 3, 1, 2};
vector<float> base_data = {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f,
@@ -62,10 +71,10 @@ TEST(SubTest, Uint8)
pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
for (size_t i = 0; i < output_data.size(); ++i)
{
- Tensor input1_tensor =
- makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
- Tensor input2_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
- quant_param.second, test_data);
+ Tensor input1_tensor = makeInputTensor<DataType::U8>(
+ base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U8>(
+ test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
Tensor output_tensor =
makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
@@ -74,6 +83,7 @@ TEST(SubTest, Uint8)
Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -90,10 +100,10 @@ TEST(SubTest, Uint8)
// Re-run with exchanged inputs.
for (size_t i = 0; i < output_data.size(); ++i)
{
- Tensor input1_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
- quant_param.second, test_data);
- Tensor input2_tensor =
- makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+ Tensor input1_tensor = makeInputTensor<DataType::U8>(
+ test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U8>(
+ base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
Tensor output_tensor =
makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
@@ -102,6 +112,7 @@ TEST(SubTest, Uint8)
Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -110,7 +121,7 @@ TEST(SubTest, Uint8)
}
}
-TEST(SubTest, Float)
+TEST_F(SubTest, Float)
{
Shape base_shape = {2, 3, 1, 2};
vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
@@ -130,8 +141,10 @@ TEST(SubTest, Float)
vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
for (size_t i = 0; i < test_shapes.size(); ++i)
{
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
- Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
+ Tensor input1_tensor =
+ makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
SubParams params{};
@@ -139,6 +152,7 @@ TEST(SubTest, Float)
Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
@@ -148,10 +162,10 @@ TEST(SubTest, Float)
}
}
-TEST(SubTest, Input_Output_Type_NEG)
+TEST_F(SubTest, Input_Output_Type_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
- Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2});
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
SubParams params{};
@@ -161,10 +175,10 @@ TEST(SubTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST(SubTest, Invalid_Input_Type_NEG)
+TEST_F(SubTest, Invalid_Input_Type_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1});
- Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2});
+ Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S64);
SubParams params{};
@@ -172,6 +186,7 @@ TEST(SubTest, Invalid_Input_Type_NEG)
Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
EXPECT_ANY_THROW(kernel.execute());
}
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-interpreter/src/kernels/Tanh.cpp
index 1c3d1281d..c4fa16912 100644
--- a/compiler/luci-interpreter/src/kernels/Tanh.cpp
+++ b/compiler/luci-interpreter/src/kernels/Tanh.cpp
@@ -18,7 +18,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/tanh.h>
namespace luci_interpreter
{
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
index ef727d6eb..bfae479a9 100644
--- a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Tanh.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -27,18 +28,28 @@ namespace
using namespace testing;
-TEST(TanhTest, Float)
+class TanhTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(TanhTest, Float)
{
Shape input_shape{1, 2, 4, 1};
std::vector<float> input_data{
0, -6, 2, 4, //
3, -2, 10, 1, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Tanh kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{
@@ -48,7 +59,7 @@ TEST(TanhTest, Float)
EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
}
-TEST(TanhTest, Uint8)
+TEST_F(TanhTest, Uint8)
{
float kMin = -1;
float kMax = 127.f / 128.f;
@@ -69,13 +80,15 @@ TEST(TanhTest, Uint8)
0, -6, 2, 4, //
-4, -2, 8, 1, //
};
- Tensor input_tensor = makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first,
- input_quant_param.second, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
Tensor output_tensor =
makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
Tanh kernel(&input_tensor, &output_tensor);
kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
kernel.execute();
std::vector<float> ref_output_data{
@@ -97,7 +110,7 @@ TEST(TanhTest, Uint8)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
}
-TEST(TanhTest, InputTypeInvalid_NEG)
+TEST_F(TanhTest, InputTypeInvalid_NEG)
{
std::vector<int64_t> input_data{
0, -6, 2, 4, //
@@ -113,14 +126,16 @@ TEST(TanhTest, InputTypeInvalid_NEG)
0, -6, 2, 4, //
-4, -2, 8, 1, //
};
- Tensor input_tensor = makeInputTensor<DataType::S64>({2, 6, 4, 1}, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S64>({2, 6, 4, 1}, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
Tanh kernel(&input_tensor, &output_tensor);
+ _memory_manager->allocate_memory(output_tensor);
EXPECT_ANY_THROW(kernel.execute());
}
-TEST(TanhTest, InputOutputMismatch_NEG)
+TEST_F(TanhTest, InputOutputMismatch_NEG)
{
std::vector<float> input_data{
0, -6, 2, 4, //
@@ -136,7 +151,8 @@ TEST(TanhTest, InputOutputMismatch_NEG)
0, -6, 2, 4, //
-4, -2, 8, 1, //
};
- Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 6, 4, 1}, input_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 6, 4, 1}, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
Tanh kernel(&input_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.cpp b/compiler/luci-interpreter/src/kernels/TestUtils.cpp
index 831dc4247..4d983adda 100644
--- a/compiler/luci-interpreter/src/kernels/TestUtils.cpp
+++ b/compiler/luci-interpreter/src/kernels/TestUtils.cpp
@@ -43,6 +43,11 @@ std::vector<float> dequantizeTensorData(const Tensor &tensor)
std::vector<uint8_t> data = extractTensorData<uint8_t>(tensor);
return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
}
+ if (tensor.element_type() == DataType::S8)
+ {
+ std::vector<int8_t> data = extractTensorData<int8_t>(tensor);
+ return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
+ }
else if (tensor.element_type() == DataType::S16)
{
// S16 quantization is symmetric, so zero point should be zero.
diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.h b/compiler/luci-interpreter/src/kernels/TestUtils.h
index c4c73d546..1f5a0c308 100644
--- a/compiler/luci-interpreter/src/kernels/TestUtils.h
+++ b/compiler/luci-interpreter/src/kernels/TestUtils.h
@@ -19,6 +19,7 @@
#define LUCI_INTERPRETER_KERNELS_TESTUTILS_H
#include "luci_interpreter/core/Tensor.h"
+#include "luci_interpreter/MemoryManager.h"
#include <type_traits>
@@ -36,9 +37,11 @@ template <typename T>
std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point);
template <DataType DT>
-Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data)
+Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data,
+ IMemoryManager *memory_manager)
{
Tensor tensor(DT, shape, {}, "");
+ memory_manager->allocate_memory(tensor);
tensor.writeData(data.data(), data.size() * sizeof(typename DataTypeImpl<DT>::Type));
return tensor;
}
@@ -50,16 +53,18 @@ Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeIm
* @param scale scale of quantized number
* @param zero_point zero point of quantized number, should be 0 for signed datatypes
* @param data floating point data for quantization
+ * @param memory_manager memory manager for allocating memory to tensor
* @return created tensor
*/
template <DataType DT>
Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point,
- const std::vector<float> &data)
+ const std::vector<float> &data, IMemoryManager *memory_manager)
{
using NativeT = typename DataTypeImpl<DT>::Type;
Tensor tensor(DT, shape, {{scale}, {zero_point}}, "");
std::vector<NativeT> quantized_data =
quantize<NativeT>(data.data(), data.size(), scale, zero_point);
+ memory_manager->allocate_memory(tensor);
tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
return tensor;
}
@@ -72,12 +77,13 @@ Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point,
* @param zero_points zero points of quantized number, should be 0 for signed datatypes
* @param quantize_dimension dimension to apply quantization along. Usually channels/output channels
* @param data floating point data for quantization
+ * @param memory_manager memory manager for allocating memory to tensor
* @return created tensor
*/
template <DataType DT>
Tensor makeInputTensor(const Shape &shape, const std::vector<float> &scales,
const std::vector<int32_t> &zero_points, int quantized_dimension,
- const std::vector<float> &data)
+ const std::vector<float> &data, IMemoryManager *memory_manager)
{
using NativeT = typename DataTypeImpl<DT>::Type;
assert(quantized_dimension < shape.num_dims());
@@ -113,6 +119,7 @@ Tensor makeInputTensor(const Shape &shape, const std::vector<float> &scales,
part_quantized_data.end());
}
assert(quantized_data.size() == shape.num_elements());
+ memory_manager->allocate_memory(tensor);
tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
return tensor;
}
@@ -127,12 +134,26 @@ template <typename T> constexpr DataType getElementType()
{
if (std::is_same<T, float>::value)
return DataType::FLOAT32;
+ if (std::is_same<T, double>::value)
+ return DataType::FLOAT64;
if (std::is_same<T, uint8_t>::value)
return DataType::U8;
+ if (std::is_same<T, uint16_t>::value)
+ return DataType::U16;
+ if (std::is_same<T, uint32_t>::value)
+ return DataType::U32;
+ if (std::is_same<T, uint64_t>::value)
+ return DataType::U64;
+ if (std::is_same<T, int8_t>::value)
+ return DataType::S8;
+ if (std::is_same<T, int16_t>::value)
+ return DataType::S16;
if (std::is_same<T, int32_t>::value)
return DataType::S32;
if (std::is_same<T, int64_t>::value)
return DataType::S64;
+ if (std::is_same<T, bool>::value)
+ return DataType::BOOL;
return DataType::Unknown;
}
@@ -156,8 +177,6 @@ std::vector<T> quantize(const float *data, size_t num_elements, float scale, int
float q_min{}, q_max{};
if (std::is_signed<T>::value)
{
- // For now, assume that signed type implies signed symmetric quantization.
- assert(zero_point == 0);
q_min = -std::numeric_limits<T>::max();
q_max = std::numeric_limits<T>::max();
}
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.cpp b/compiler/luci-interpreter/src/kernels/Transpose.cpp
index c1a11cdb0..802d87295 100644
--- a/compiler/luci-interpreter/src/kernels/Transpose.cpp
+++ b/compiler/luci-interpreter/src/kernels/Transpose.cpp
@@ -18,7 +18,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
#include <stdexcept>
@@ -37,7 +37,7 @@ void Transpose::configure()
{
// Transpose op only supports 1D-4D input arrays.
int dims = input()->shape().num_dims();
- const int *perm_data = getTensorData<int32_t>(perm());
+ const int32_t *perm_data = getTensorData<int32_t>(perm());
assert(input()->shape().num_dims() <= 4);
assert(input()->element_type() == output()->element_type());
@@ -58,8 +58,8 @@ void Transpose::configure()
void Transpose::execute() const
{
tflite::TransposeParams params{};
- const int *perm_data = getTensorData<int32_t>(perm());
- const int size = perm()->shape().dim(0);
+ const int32_t *perm_data = getTensorData<int32_t>(perm());
+ const int32_t size = perm()->shape().dim(0);
params.perm_count = size;
for (int i = 0; i < size; i++)
params.perm[i] = perm_data[i];
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
index f0a915c35..107179910 100644
--- a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/Transpose.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -31,13 +32,16 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
std::initializer_list<int32_t> perm_data, std::initializer_list<T> output_data)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
constexpr DataType element_type = getElementType<T>();
- Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
- Tensor perm_tensor = makeInputTensor<DataType::S32>(perm_shape, perm_data);
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+ Tensor perm_tensor = makeInputTensor<DataType::S32>(perm_shape, perm_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(element_type);
Transpose kernel(&input_tensor, &perm_tensor, &output_tensor);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
kernel.execute();
EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
index 0c70756b2..1b5f9d941 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
@@ -19,7 +19,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
#include <stdexcept>
@@ -30,8 +30,10 @@ namespace kernels
{
TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
- const Tensor *bias, Tensor *output, const TransposeConvParams &params)
- : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params)
+ const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+ const TransposeConvParams &params)
+ : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias},
+ {output, scratch_tensor}, params)
{
}
@@ -74,15 +76,18 @@ void TransposeConv::configure()
if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
{
- DataType scratch_data_type =
- input()->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
- _scratch_tensor =
- std::make_unique<Tensor>(scratch_data_type, output()->shape(), AffineQuantization{}, "");
+ auto scratch_tensor = getOutputTensors()[1];
+ scratch_tensor->resize(output()->shape());
const std::vector<double> real_multipliers =
getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
_quant_multipliers = quantizeMultipliers(real_multipliers);
}
+ else
+ {
+ auto scratch_tensor = getOutputTensors()[1];
+ scratch_tensor->set_allocatable(false);
+ }
}
void TransposeConv::execute() const
@@ -111,8 +116,6 @@ void TransposeConv::execute() const
default:
throw std::runtime_error("Unsupported type.");
}
- if (!!_scratch_tensor)
- _scratch_tensor->deallocate();
}
void TransposeConv::evalFloat() const
@@ -148,13 +151,15 @@ void TransposeConv::evalQuantized() const
op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
+ auto scratch_tensor = getOutputTensors()[1];
+
tflite::reference_ops::TransposeConv(op_params, //
getTensorShape(input()), getTensorData<uint8>(input()), //
getTensorShape(filter()), getTensorData<uint8>(filter()), //
getTensorShape(bias()), getTensorData<int32_t>(bias()), //
getTensorShape(output()), getTensorData<uint8>(output()), //
tflite::RuntimeShape(), nullptr, //
- getTensorData<int32_t>(_scratch_tensor.get()));
+ getTensorData<int32_t>(scratch_tensor));
}
void TransposeConv::evalQuantizedPerChannel() const
@@ -163,7 +168,9 @@ void TransposeConv::evalQuantizedPerChannel() const
const auto *filter_data = getTensorData<uint8_t>(filter());
const auto *bias_data = getTensorData<int32_t>(bias());
auto *output_data = getTensorData<uint8_t>(output());
- auto *scratch_data = getTensorData<int32_t>(_scratch_tensor.get());
+
+ auto scratch_tensor = getOutputTensors()[1];
+ auto *scratch_data = getTensorData<int32_t>(scratch_tensor);
const Shape &input_shape = input()->shape();
const Shape &filter_shape = filter()->shape();
@@ -186,7 +193,7 @@ void TransposeConv::evalQuantizedPerChannel() const
int32_t activation_max{};
calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
- std::memset(scratch_data, 0, _scratch_tensor->shape().num_elements() * sizeof(int32_t));
+ std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t));
BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
for (int32_t batch = 0; batch < batches; ++batch)
@@ -255,7 +262,9 @@ void TransposeConv::evalQuantizedS16() const
const auto *filter_data = getTensorData<int16_t>(filter());
const auto *bias_data = getTensorData<int64_t>(bias());
auto *output_data = getTensorData<int16_t>(output());
- auto *scratch_data = getTensorData<int64_t>(_scratch_tensor.get());
+
+ auto scratch_tensor = getOutputTensors()[1];
+ auto *scratch_data = getTensorData<int64_t>(scratch_tensor);
const Shape &input_shape = input()->shape();
const Shape &filter_shape = filter()->shape();
@@ -278,7 +287,7 @@ void TransposeConv::evalQuantizedS16() const
int32_t activation_max{};
calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
- std::memset(scratch_data, 0, _scratch_tensor->shape().num_elements() * sizeof(int64_t));
+ std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t));
BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
for (int32_t batch = 0; batch < batches; ++batch)
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-interpreter/src/kernels/TransposeConv.h
index 2e0beece8..cea0cf3c7 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.h
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.h
@@ -31,7 +31,8 @@ class TransposeConv : public KernelWithParams<TransposeConvParams>
{
public:
TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
- const Tensor *bias, Tensor *output, const TransposeConvParams &params);
+ const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+ const TransposeConvParams &params);
~TransposeConv();
@@ -51,8 +52,6 @@ private:
void evalQuantizedS16() const;
private:
- std::unique_ptr<Tensor> _scratch_tensor;
-
int32_t _padding_height{};
int32_t _padding_width{};
// The scaling factor from input to output (aka the 'real multiplier') can
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
index 9bcb015c1..4856e1b87 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
@@ -16,6 +16,7 @@
#include "kernels/TransposeConv.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -35,11 +36,18 @@ void Check(std::initializer_list<int32_t> output_shape_shape,
std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height,
int32_t stride_width)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
constexpr DataType element_type = getElementType<T>();
Tensor output_shape_tensor =
- makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data);
- Tensor weight_tensor = makeInputTensor<element_type>(weight_shape, weight_data);
- Tensor input_data_tensor = makeInputTensor<element_type>(input_shape, input_data);
+ makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data, memory_manager.get());
+ Tensor weight_tensor =
+ makeInputTensor<element_type>(weight_shape, weight_data, memory_manager.get());
+ Tensor input_data_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+
+ DataType scratch_data_type = element_type == DataType::S16 ? DataType::S64 : DataType::S32;
+ Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(element_type);
TransposeConvParams params{};
@@ -49,17 +57,22 @@ void Check(std::initializer_list<int32_t> output_shape_shape,
if (bias_data.size() != 0)
{
- Tensor bias_tensor = makeInputTensor<getElementType<B>()>(bias_shape, bias_data);
+ Tensor bias_tensor =
+ makeInputTensor<getElementType<B>()>(bias_shape, bias_data, memory_manager.get());
TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor,
- &output_tensor, params);
+ &output_tensor, &scratch_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ memory_manager->allocate_memory(scratch_tensor);
kernel.execute();
}
else
{
TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr,
- &output_tensor, params);
+ &output_tensor, &scratch_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ memory_manager->allocate_memory(scratch_tensor);
kernel.execute();
}
EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -114,6 +127,8 @@ TEST(TransposeConvTest, SimpleBiasTest)
TEST(TransposeConvTest, UInt8)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
std::vector<float> input_data{1, 2, 3, 4};
std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
std::vector<float> bias_data{3, 4};
@@ -131,23 +146,30 @@ TEST(TransposeConvTest, UInt8)
auto filter_quant = quantizationParams<uint8_t>(-24.0, 39.75); // s = 1 / 4, zp = 96
auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first, input_quant.second, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::U8>({2, 3, 3, 1}, filter_quant.first,
- filter_quant.second, filter_data);
- Tensor bias_tensor =
- makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first, 0, bias_data);
- Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::U8>(
+ {2, 3, 3, 1}, filter_quant.first, filter_quant.second, filter_data, memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first,
+ 0, bias_data, memory_manager.get());
+ Tensor output_shape_tensor =
+ makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
+ DataType scratch_data_type =
+ input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+ Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
TransposeConvParams params{};
params.padding = Padding::VALID;
params.stride_height = 2;
params.stride_width = 2;
TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
- &output_tensor, params);
+ &output_tensor, &scratch_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ memory_manager->allocate_memory(scratch_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
@@ -156,6 +178,8 @@ TEST(TransposeConvTest, UInt8)
TEST(TransposeConvTest, UInt8_CWQ)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
const int32_t output_channels = 2;
std::vector<float> input_data{1, 2, 3, 4};
std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
@@ -190,23 +214,30 @@ TEST(TransposeConvTest, UInt8_CWQ)
bias_scales.push_back(filter_quant_params[i].first * input_quant.first);
std::vector<int32_t> zerop(output_channels, 0);
- Tensor input_tensor =
- makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first, input_quant.second, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::U8>({output_channels, 3, 3, 1}, filter_scales,
- filter_zerops, 0, filter_data);
- Tensor bias_tensor =
- makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data);
- Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::U8>(
+ {output_channels, 3, 3, 1}, filter_scales, filter_zerops, 0, filter_data, memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+ bias_data, memory_manager.get());
+ Tensor output_shape_tensor =
+ makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
+ DataType scratch_data_type =
+ input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+ Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
TransposeConvParams params{};
params.padding = Padding::VALID;
params.stride_height = 2;
params.stride_width = 2;
TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
- &output_tensor, params);
+ &output_tensor, &scratch_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ memory_manager->allocate_memory(scratch_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
@@ -215,6 +246,8 @@ TEST(TransposeConvTest, UInt8_CWQ)
TEST(TransposeConvTest, SInt16)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
std::vector<float> input_data{1, 2, 3, 4};
std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
std::vector<float> bias_data{3, 4};
@@ -227,20 +260,30 @@ TEST(TransposeConvTest, SInt16)
42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
};
- Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data);
- Tensor filter_tensor = makeInputTensor<DataType::S16>({2, 3, 3, 1}, 0.2, 0, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::S64>({2}, 0.25 * 0.2, 0, bias_data);
- Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data, memory_manager.get());
+ Tensor filter_tensor =
+ makeInputTensor<DataType::S16>({2, 3, 3, 1}, 0.2, 0, filter_data, memory_manager.get());
+ Tensor bias_tensor =
+ makeInputTensor<DataType::S64>({2}, 0.25 * 0.2, 0, bias_data, memory_manager.get());
+ Tensor output_shape_tensor =
+ makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+ DataType scratch_data_type =
+ input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+ Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
TransposeConvParams params{};
params.padding = Padding::VALID;
params.stride_height = 2;
params.stride_width = 2;
TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
- &output_tensor, params);
+ &output_tensor, &scratch_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ memory_manager->allocate_memory(scratch_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
@@ -249,6 +292,8 @@ TEST(TransposeConvTest, SInt16)
TEST(TransposeConvTest, SInt16_CWQ_weights)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
const int output_channels = 2;
const Shape input_shape{1, 2, 2, 1};
const Shape filter_shape{output_channels, 3, 3, 1};
@@ -273,21 +318,30 @@ TEST(TransposeConvTest, SInt16_CWQ_weights)
std::vector<float> bias_scales{filter_scales[0] * input_scale, filter_scales[1] * input_scale};
const std::vector<int32_t> zerop(2, 0);
- Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data);
- Tensor filter_tensor =
- makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, filter_data);
- Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data);
- Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, memory_manager.get());
+ Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+ filter_data, memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+ memory_manager.get());
+ Tensor output_shape_tensor =
+ makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
+ DataType scratch_data_type =
+ input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+ Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
TransposeConvParams params{};
params.padding = Padding::VALID;
params.stride_height = 2;
params.stride_width = 2;
TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
- &output_tensor, params);
+ &output_tensor, &scratch_tensor, params);
kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ memory_manager->allocate_memory(scratch_tensor);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
diff --git a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
index 6d611e12e..4f22c9f30 100644
--- a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
@@ -17,6 +17,7 @@
#include "kernels/Unpack.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -32,10 +33,12 @@ void Check(int axis, Shape input_shape, std::initializer_list<T> input_data,
const std::vector<std::initializer_list<int32_t>> &exp_output_shape,
std::vector<std::initializer_list<T>> exp_output_data)
{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
constexpr DataType element_type = getElementType<T>();
const int num_outputs = input_shape.dim(axis < 0 ? axis + input_shape.num_dims() : axis);
- Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+ Tensor input_tensor =
+ makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
std::vector<Tensor> output_tensors;
output_tensors.reserve(num_outputs);
for (int i = 0; i < num_outputs; ++i)
@@ -54,6 +57,10 @@ void Check(int axis, Shape input_shape, std::initializer_list<T> input_data,
Unpack kernel(&input_tensor, std::move(output_tensor_ptrs), params);
kernel.configure();
+ for (int i = 0; i < num_outputs; i++)
+ {
+ memory_manager->allocate_memory(output_tensors[i]);
+ }
kernel.execute();
for (int i = 0; i < num_outputs; ++i)
diff --git a/compiler/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-interpreter/src/kernels/Utils.cpp
index 83faa7d7f..6e83e37f6 100644
--- a/compiler/luci-interpreter/src/kernels/Utils.cpp
+++ b/compiler/luci-interpreter/src/kernels/Utils.cpp
@@ -91,7 +91,7 @@ static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t
void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
int32_t *activation_min, int32_t *activation_max)
{
- // For now, assume that signed type implies signed symmetric quantization.
+ assert(output->zero_points().size() == 1);
int32_t qmin{};
int32_t qmax{};
switch (output->element_type())
@@ -101,11 +101,11 @@ void calculateActivationRangeQuantized(Activation activation, const Tensor *outp
qmax = std::numeric_limits<uint8_t>::max();
break;
case DataType::S8:
- assert(output->zero_point() == 0);
qmin = -std::numeric_limits<int8_t>::max();
qmax = std::numeric_limits<int8_t>::max();
break;
case DataType::S16:
+ // For now, assume that signed int16 type implies signed symmetric quantization.
assert(output->zero_point() == 0);
qmin = -std::numeric_limits<int16_t>::max();
qmax = std::numeric_limits<int16_t>::max();
diff --git a/compiler/luci-interpreter/src/kernels/While.cpp b/compiler/luci-interpreter/src/kernels/While.cpp
index d4676467d..153bd1a99 100644
--- a/compiler/luci-interpreter/src/kernels/While.cpp
+++ b/compiler/luci-interpreter/src/kernels/While.cpp
@@ -49,6 +49,13 @@ void copy(const std::vector<Tensor *> &src, const std::vector<Tensor *> &dst)
copy(const_src, dst);
}
+// TODO: Think about how allocate memory for output in main graph
+void configureTensorsAllocations(const std::vector<Tensor *> &tensors, RuntimeGraph *run_graph)
+{
+ for (auto tensor : tensors)
+ run_graph->configureAllocations(tensor);
+}
+
} // namespace
While::While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
@@ -78,11 +85,15 @@ void While::execute() const
const auto &cond_inputs = _cond_graph->getInputTensors();
const auto &cond_outputs = _cond_graph->getOutputTensors();
+ configureTensorsAllocations(cond_inputs, _cond_graph);
+
copy(getInputTensors(), cond_inputs);
const auto &body_inputs = _body_graph->getInputTensors();
const auto &body_outputs = _body_graph->getOutputTensors();
+ configureTensorsAllocations(body_inputs, _body_graph);
+
while (true)
{
_cond_graph->execute();
diff --git a/compiler/luci-interpreter/src/kernels/While.test.cpp b/compiler/luci-interpreter/src/kernels/While.test.cpp
index a066d2c12..cb8f89130 100644
--- a/compiler/luci-interpreter/src/kernels/While.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/While.test.cpp
@@ -20,6 +20,7 @@
#include "kernels/Less.h"
#include "kernels/While.h"
#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
namespace luci_interpreter
{
@@ -30,14 +31,18 @@ namespace
using namespace testing;
-RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *input_cond)
+RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *input_cond,
+ IMemoryManager *memory_manager)
{
- RuntimeGraph *graph = module->addGraph();
+ RuntimeGraph *graph = module->addGraph(memory_manager);
Tensor *input =
graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
Tensor *output =
graph->addTensor(std::make_unique<Tensor>(DataType::BOOL, Shape{}, AffineQuantization{}, ""));
+ memory_manager->allocate_memory(*input);
+ memory_manager->allocate_memory(*output);
+
graph->setInputTensors({input});
graph->setOutputTensors({output});
@@ -46,14 +51,18 @@ RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *i
return graph;
}
-RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *input_add)
+RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *input_add,
+ IMemoryManager *memory_manager)
{
- RuntimeGraph *graph = module->addGraph();
+ RuntimeGraph *graph = module->addGraph(memory_manager);
Tensor *input =
graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
Tensor *output =
graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
+ memory_manager->allocate_memory(*input);
+ memory_manager->allocate_memory(*output);
+
graph->setInputTensors({input});
graph->setOutputTensors({output});
@@ -66,18 +75,22 @@ RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *i
TEST(WhileTest, FloatLoop10)
{
- Tensor input = makeInputTensor<DataType::FLOAT32>({1}, {1});
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ Tensor input = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get());
Tensor output = makeOutputTensor(DataType::FLOAT32);
- Tensor input_cond = makeInputTensor<DataType::FLOAT32>({1}, {10});
- Tensor input_add = makeInputTensor<DataType::FLOAT32>({1}, {1});
+ Tensor input_cond = makeInputTensor<DataType::FLOAT32>({1}, {10}, memory_manager.get());
+ Tensor input_add = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get());
RuntimeModule module(nullptr);
- RuntimeGraph *cond_graph = buildCondSubgraph(&module, DataType::FLOAT32, &input_cond);
- RuntimeGraph *body_graph = buildBodySubgraph(&module, DataType::FLOAT32, &input_add);
+ RuntimeGraph *cond_graph =
+ buildCondSubgraph(&module, DataType::FLOAT32, &input_cond, memory_manager.get());
+ RuntimeGraph *body_graph =
+ buildBodySubgraph(&module, DataType::FLOAT32, &input_add, memory_manager.get());
While kernel({&input}, {&output}, cond_graph, body_graph);
kernel.configure();
+ memory_manager->allocate_memory(output);
kernel.execute();
EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({10}));
diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt
index 782f46761..974283a2f 100644
--- a/compiler/luci-interpreter/src/loader/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt
@@ -7,14 +7,23 @@ set(SOURCES
KernelBuilder.cpp
ModuleLoader.h
ModuleLoader.cpp
- RuntimeToIR.h)
+ RuntimeToIR.h
+ nodes/Builders.h)
-add_library(luci_interpreter_loader STATIC ${SOURCES})
-set_target_properties(luci_interpreter_loader PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(luci_interpreter_loader PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
-target_link_libraries(luci_interpreter_loader
- PUBLIC luci_lang luci_interpreter_core
- PRIVATE luci_interpreter_kernels nncc_common)
+# include kernel specific builders
+macro(REGISTER_KERNEL NODE)
+ list(APPEND SOURCES "nodes/${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+include(${KERNEL_REGISTER_FILE})
+
+add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES})
+set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}")
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+
+target_link_libraries(${LUCI_INTERPRETER_LOADER}
+ PUBLIC luci_lang ${LUCI_INTERPRETER_CORE}
+ PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common)
if(NOT ENABLE_TEST)
return()
@@ -24,5 +33,5 @@ nnas_find_package(GTest REQUIRED)
set(TEST_SOURCES KernelBuilder.test.cpp)
-GTest_AddTest(luci_interpreter_loader_test ${TEST_SOURCES})
-target_link_libraries(luci_interpreter_loader_test luci_interpreter_loader)
+GTest_AddTest(${LUCI_INTERPRETER_LOADER}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_LOADER}_test ${LUCI_INTERPRETER_LOADER})
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
index ee45ad747..b55e7c504 100644
--- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
@@ -57,6 +57,8 @@ const void *getNodeData(const luci::CircleConst *node, size_t *data_size)
return getNodeDataImpl<DataType::U8>(node, data_size);
case DataType::FLOAT32:
return getNodeDataImpl<DataType::FLOAT32>(node, data_size);
+ case DataType::S8:
+ return getNodeDataImpl<DataType::S8>(node, data_size);
case DataType::S16:
return getNodeDataImpl<DataType::S16>(node, data_size);
case DataType::S32:
@@ -82,6 +84,7 @@ bool isExecutableNode(const luci::CircleNode *node)
// The following nodes denote outputs of multiple-output nodes.
case luci::CircleOpcode::CIRCLEIFOUT:
case luci::CircleOpcode::CIRCLESPLITOUT:
+ case luci::CircleOpcode::CIRCLESPLITVOUT:
case luci::CircleOpcode::CIRCLEUNPACKOUT:
case luci::CircleOpcode::CIRCLEWHILEOUT:
return false;
@@ -112,9 +115,10 @@ bool isTensorProducingNode(const luci::CircleNode *node)
GraphLoader::GraphLoader(
const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+ std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, IMemoryManager *memory_manager)
: _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
- _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
+ _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor),
+ _memory_manager(memory_manager)
{
}
@@ -156,7 +160,10 @@ void GraphLoader::loadTensors()
size_t data_size{};
const void *const_data = getNodeData(const_node, &data_size);
if (const_data != nullptr)
+ {
+ _memory_manager->allocate_memory(*tensor);
tensor->writeData(const_data, data_size);
+ }
}
_node_to_tensor.emplace(node, tensor.get());
@@ -173,6 +180,7 @@ void GraphLoader::initInputOutputTensors() const
for (size_t i = 0; i < input_nodes.size(); ++i)
{
input_tensors[i] = _node_to_tensor.at(input_nodes[i]);
+ _memory_manager->allocate_memory(*input_tensors[i]);
}
_runtime_graph->setInputTensors(input_tensors);
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.h b/compiler/luci-interpreter/src/loader/GraphLoader.h
index 89c5bcad7..fe066ecf8 100644
--- a/compiler/luci-interpreter/src/loader/GraphLoader.h
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.h
@@ -19,6 +19,7 @@
#include "core/RuntimeGraph.h"
#include "loader/RuntimeToIR.h"
+#include "luci_interpreter/MemoryManager.h"
#include <loco/IR/Graph.h>
@@ -32,7 +33,8 @@ class GraphLoader
public:
GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
+ std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+ IMemoryManager *memory_manager);
void loadTensors();
void initInputOutputTensors() const;
@@ -42,6 +44,7 @@ private:
const loco::Graph *_graph;
RuntimeGraph *_runtime_graph;
RuntimeToIR &_runtime_to_ir;
+ IMemoryManager *_memory_manager;
const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
index 4cb8bd691..8483a9a3d 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
@@ -15,1240 +15,90 @@
*/
#include "loader/KernelBuilder.h"
-
-#include "kernels/Add.h"
-#include "kernels/ArgMax.h"
-#include "kernels/AveragePool2D.h"
-#include "kernels/BatchToSpaceND.h"
-#include "kernels/Cast.h"
-#include "kernels/Concatenation.h"
-#include "kernels/Conv2D.h"
-#include "kernels/DepthToSpace.h"
-#include "kernels/DepthwiseConv2D.h"
-#include "kernels/Div.h"
-#include "kernels/Elu.h"
-#include "kernels/Exp.h"
-#include "kernels/Floor.h"
-#include "kernels/FloorDiv.h"
-#include "kernels/Equal.h"
-#include "kernels/FullyConnected.h"
-#include "kernels/Greater.h"
-#include "kernels/GreaterEqual.h"
-#include "kernels/If.h"
-#include "kernels/InstanceNorm.h"
-#include "kernels/L2Normalize.h"
-#include "kernels/L2Pool2D.h"
-#include "kernels/LeakyRelu.h"
-#include "kernels/Less.h"
-#include "kernels/LessEqual.h"
-#include "kernels/LocalResponseNormalization.h"
-#include "kernels/LogicalAnd.h"
-#include "kernels/LogicalNot.h"
-#include "kernels/LogicalOr.h"
-#include "kernels/Logistic.h"
-#include "kernels/LogSoftmax.h"
-#include "kernels/Maximum.h"
-#include "kernels/MaxPool2D.h"
-#include "kernels/Mean.h"
-#include "kernels/Minimum.h"
-#include "kernels/MirrorPad.h"
-#include "kernels/Mul.h"
-#include "kernels/Neg.h"
-#include "kernels/NotEqual.h"
-#include "kernels/Pack.h"
-#include "kernels/Pad.h"
-#include "kernels/PadV2.h"
-#include "kernels/Pow.h"
-#include "kernels/PRelu.h"
-#include "kernels/Relu.h"
-#include "kernels/Relu6.h"
-#include "kernels/Reshape.h"
-#include "kernels/ResizeBilinear.h"
-#include "kernels/ResizeNearestNeighbor.h"
-#include "kernels/ReverseV2.h"
-#include "kernels/Rsqrt.h"
-#include "kernels/Slice.h"
-#include "kernels/Softmax.h"
-#include "kernels/SpaceToBatchND.h"
-#include "kernels/SpaceToDepth.h"
-#include "kernels/Split.h"
-#include "kernels/StridedSlice.h"
-#include "kernels/Sqrt.h"
-#include "kernels/Square.h"
-#include "kernels/SquaredDifference.h"
-#include "kernels/Squeeze.h"
-#include "kernels/Sub.h"
-#include "kernels/Tanh.h"
-#include "kernels/Unpack.h"
-#include "kernels/Transpose.h"
-#include "kernels/TransposeConv.h"
-#include "kernels/While.h"
+#include "loader/nodes/Builders.h"
#include <stdexcept>
-namespace
-{
-
-template <typename CircleNodeOut>
-std::vector<const loco::Node *> collectOutputNodes(const luci::CircleNode *node)
-{
- std::vector<const CircleNodeOut *> output_nodes;
- for (const loco::Node *loco_node : loco::succs(node))
- {
- output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node));
- }
- std::sort(output_nodes.begin(), output_nodes.end(),
- [](const CircleNodeOut *node1, const CircleNodeOut *node2) {
- return node1->index() < node2->index();
- });
- return {output_nodes.cbegin(), output_nodes.cend()};
-}
-
-} // namespace
-
namespace luci_interpreter
{
-// TODO move to anonymous namespace
-enum class KB
+#define CIRCLE_NODE(OPCODE, CLASS) CLASS,
+#define CIRCLE_VNODE(OPCODE, CLASS) CLASS,
+
+// This enum is auxiliary.
+// It is duplicate of luci::CircleOpcode but initialized with CLASS instead of OPCODE,
+// because list of target operators is in format of CLASS names
+enum class BuilderId
{
- ABC,
- DEF,
- GHIJ,
- KLMN,
- OPQR,
- STUV,
- WXYZ,
+#include <luci/IR/CircleNodes.lst>
+ Size // casts to count of values in BuilderId enum
};
-#define DECLARE_VISIT(CLASS) std::unique_ptr<Kernel> visit(const luci::CLASS *) override
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
-template <KB kb> class KernelBuilderLet;
+/**
+ * @brief Registry of kernel builders
+ *
+ * This class contains mapping from Opcodes to kernel builder functions
+ */
-template <>
-class KernelBuilderLet<KB::ABC> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>,
- public KernelBuilderHelper
+class KernelBuilderRegistry
{
public:
- KernelBuilderLet(
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
- : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
- {
- }
+ using KernelBuilderFunc = std::unique_ptr<Kernel>(const luci::CircleNode *,
+ KernelBuilderHelper &);
-public:
- std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; }
-
-public:
- DECLARE_VISIT(CircleAdd);
- DECLARE_VISIT(CircleArgMax);
- DECLARE_VISIT(CircleAveragePool2D);
- DECLARE_VISIT(CircleBatchToSpaceND);
- DECLARE_VISIT(CircleCast);
- DECLARE_VISIT(CircleConcatenation);
- DECLARE_VISIT(CircleConst);
- DECLARE_VISIT(CircleConv2D);
-};
-
-template <>
-class KernelBuilderLet<KB::DEF> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>,
- public KernelBuilderHelper
-{
-public:
- KernelBuilderLet(
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
- : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
+ KernelBuilderRegistry() : _operator_builders(size_t(BuilderId::Size), nullptr)
{
- }
+#define REGISTER_KERNEL(name) \
+ register_kernel_builder(BuilderId::Circle##name, build_kernel_Circle##name);
-public:
- std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; }
-
-public:
- DECLARE_VISIT(CircleDepthToSpace);
- DECLARE_VISIT(CircleDepthwiseConv2D);
- DECLARE_VISIT(CircleDiv);
- DECLARE_VISIT(CircleElu);
- DECLARE_VISIT(CircleEqual);
- DECLARE_VISIT(CircleExp);
- DECLARE_VISIT(CircleFloor);
- DECLARE_VISIT(CircleFloorDiv);
- DECLARE_VISIT(CircleFullyConnected);
-};
+#include "KernelsToBuild.lst"
-template <>
-class KernelBuilderLet<KB::GHIJ> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>,
- public KernelBuilderHelper
-{
-public:
- KernelBuilderLet(
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
- : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
- {
+#undef REGISTER_KERNEL
}
-public:
- std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; }
-
-public:
- DECLARE_VISIT(CircleGreater);
- DECLARE_VISIT(CircleGreaterEqual);
- DECLARE_VISIT(CircleIf);
- DECLARE_VISIT(CircleInput);
- DECLARE_VISIT(CircleInstanceNorm);
-};
-
-template <>
-class KernelBuilderLet<KB::KLMN> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>,
- public KernelBuilderHelper
-{
-public:
- KernelBuilderLet(
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
- : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
+ KernelBuilderFunc *get_kernel_builder_func(luci::CircleOpcode opcode) const
{
+ return _operator_builders.at(size_t(opcode));
}
-public:
- std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; }
-
-public:
- DECLARE_VISIT(CircleL2Normalize);
- DECLARE_VISIT(CircleL2Pool2D);
- DECLARE_VISIT(CircleLeakyRelu);
- DECLARE_VISIT(CircleLess);
- DECLARE_VISIT(CircleLessEqual);
- DECLARE_VISIT(CircleLocalResponseNormalization);
- DECLARE_VISIT(CircleLogSoftmax);
- DECLARE_VISIT(CircleLogicalAnd);
- DECLARE_VISIT(CircleLogicalNot);
- DECLARE_VISIT(CircleLogicalOr);
- DECLARE_VISIT(CircleLogistic);
- DECLARE_VISIT(CircleMaxPool2D);
- DECLARE_VISIT(CircleMaximum);
- DECLARE_VISIT(CircleMean);
- DECLARE_VISIT(CircleMinimum);
- DECLARE_VISIT(CircleMirrorPad);
- DECLARE_VISIT(CircleMul);
- DECLARE_VISIT(CircleNeg);
- DECLARE_VISIT(CircleNotEqual);
-};
+private:
+ std::vector<KernelBuilderFunc *> _operator_builders;
-template <>
-class KernelBuilderLet<KB::OPQR> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>,
- public KernelBuilderHelper
-{
-public:
- KernelBuilderLet(
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
- : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
+ void register_kernel_builder(BuilderId id, KernelBuilderFunc *func)
{
+ // Using BuilderId is a duplicate of luci::CirclreOpcode,
+ // size_t(id) is equal to size_t(corresponding operation opcode).
+ assert(size_t(id) < _operator_builders.size());
+ _operator_builders[size_t(id)] = func;
}
-
-public:
- std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; }
-
-public:
- DECLARE_VISIT(CircleOutput);
- DECLARE_VISIT(CirclePRelu);
- DECLARE_VISIT(CirclePack);
- DECLARE_VISIT(CirclePad);
- DECLARE_VISIT(CirclePadV2);
- DECLARE_VISIT(CirclePow);
- DECLARE_VISIT(CircleRelu);
- DECLARE_VISIT(CircleRelu6);
- DECLARE_VISIT(CircleReshape);
- DECLARE_VISIT(CircleResizeBilinear);
- DECLARE_VISIT(CircleResizeNearestNeighbor);
- DECLARE_VISIT(CircleReverseV2);
- DECLARE_VISIT(CircleRsqrt);
};
-template <>
-class KernelBuilderLet<KB::STUV> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>,
- public KernelBuilderHelper
+KernelBuilder::KernelBuilder(
+ const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+ const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+ : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
{
-public:
- KernelBuilderLet(
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
- : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
- {
- }
-
-public:
- std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; }
-
-public:
- DECLARE_VISIT(CircleSlice);
- DECLARE_VISIT(CircleSoftmax);
- DECLARE_VISIT(CircleSpaceToBatchND);
- DECLARE_VISIT(CircleSpaceToDepth);
- DECLARE_VISIT(CircleSplit);
- DECLARE_VISIT(CircleSqrt);
- DECLARE_VISIT(CircleSquare);
- DECLARE_VISIT(CircleSquaredDifference);
- DECLARE_VISIT(CircleSqueeze);
- DECLARE_VISIT(CircleStridedSlice);
- DECLARE_VISIT(CircleSub);
- DECLARE_VISIT(CircleTanh);
- DECLARE_VISIT(CircleTranspose);
- DECLARE_VISIT(CircleTransposeConv);
- DECLARE_VISIT(CircleUnpack);
-};
+ _builder_registry = std::make_unique<KernelBuilderRegistry>();
+}
-template <>
-class KernelBuilderLet<KB::WXYZ> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>,
- public KernelBuilderHelper
+KernelBuilder::~KernelBuilder()
{
-public:
- KernelBuilderLet(
- const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
- : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
- {
- }
-
-public:
- std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; }
-
-public:
- DECLARE_VISIT(CircleWhile);
-};
-
-#undef DECLARE_VISIT
+ // Need to define in this CPP to hide KernelBuilderRegistry internals.
+ // This destructor deletes _builder_registry
+}
std::unique_ptr<Kernel> KernelBuilder::build(const luci::CircleNode *node)
{
-#define VISIT_KB(GRP) \
- do \
- { \
- KernelBuilderLet<KB::GRP> kbl(graph_to_runtime_graph(), node_to_tensor()); \
- auto ret = node->accept(&kbl); \
- if (ret != nullptr) \
- return ret; \
- } while (false)
-
- VISIT_KB(ABC);
- VISIT_KB(DEF);
- VISIT_KB(GHIJ);
- VISIT_KB(KLMN);
- VISIT_KB(OPQR);
- VISIT_KB(STUV);
- VISIT_KB(WXYZ);
+ auto specific_builder = _builder_registry->get_kernel_builder_func(node->opcode());
+ if (specific_builder != nullptr)
+ return specific_builder(node, *this);
-#undef VISIT_KB
std::string msg = "Unsupported operator: ";
msg += std::to_string(static_cast<uint32_t>(node->opcode())) + " " + std::string(node->name());
throw std::invalid_argument(msg.c_str());
}
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleAdd *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input1 = getInputTensor(node->x());
- const Tensor *input2 = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- AddParams params{};
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::Add>(input1, input2, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleArgMax *node)
-{
- assert(node->arity() == 2);
- const Tensor *input = getInputTensor(node->input());
- const Tensor *axis = getInputTensor(node->dimension());
- Tensor *output = getOutputTensor(node);
-
- ArgMaxParams params{};
- params.output_type = node->output_type();
-
- return std::make_unique<kernels::ArgMax>(input, axis, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleAveragePool2D *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->value());
- Tensor *output = getOutputTensor(node);
-
- Pool2DParams params{};
- params.padding = node->padding();
- params.filter_height = node->filter()->h();
- params.filter_width = node->filter()->w();
- params.stride_height = node->stride()->h();
- params.stride_width = node->stride()->w();
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::AveragePool2D>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleBatchToSpaceND *node)
-{
- assert(node->arity() == 3);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *block_shape = getInputTensor(node->block_shape());
- const Tensor *crops = getInputTensor(node->crops());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleCast *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->x());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Cast>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleConcatenation *node)
-{
- std::vector<const Tensor *> inputs(node->numValues());
- for (uint32_t i = 0; i < node->numValues(); ++i)
- {
- inputs[i] = getInputTensor(node->values(i));
- }
- Tensor *output = getOutputTensor(node);
-
- ConcatenationParams params{};
- params.axis = node->axis();
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleConst *)
-{
- throw std::runtime_error("Const node cannot be executed.");
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleConv2D *node)
-{
- assert(node->arity() == 3);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *filter = getInputTensor(node->filter());
- const Tensor *bias = getInputTensor(node->bias());
- Tensor *output = getOutputTensor(node);
-
- Conv2DParams params{};
- params.padding = node->padding();
- params.stride_height = node->stride()->h();
- params.stride_width = node->stride()->w();
- params.dilation_height_factor = node->dilation()->h();
- params.dilation_width_factor = node->dilation()->w();
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::Conv2D>(input, filter, bias, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleDepthToSpace *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->input());
- Tensor *output = getOutputTensor(node);
-
- DepthToSpaceParams params{};
- params.block_size = node->block_size();
-
- return std::make_unique<kernels::DepthToSpace>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleDepthwiseConv2D *node)
-{
- assert(node->arity() == 3);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *filter = getInputTensor(node->filter());
- const Tensor *bias = getInputTensor(node->bias());
- Tensor *output = getOutputTensor(node);
-
- DepthwiseConv2DParams params{};
- params.padding = node->padding();
- params.depth_multiplier = node->depthMultiplier();
- params.stride_height = node->stride()->h();
- params.stride_width = node->stride()->w();
- params.dilation_height_factor = node->dilation()->h();
- params.dilation_width_factor = node->dilation()->w();
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleDiv *node)
-{
- assert(node->arity() == 2);
- const Tensor *input1 = getInputTensor(node->x());
- const Tensor *input2 = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- DivParams params{};
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::Div>(input1, input2, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleElu *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->features());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Elu>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleEqual *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *x = getInputTensor(node->x());
- const Tensor *y = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Equal>(x, y, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleExp *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->x());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Exp>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleFloor *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->x());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Floor>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleFloorDiv *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *x = getInputTensor(node->x());
- const Tensor *y = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::FloorDiv>(x, y, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleFullyConnected *node)
-{
- assert(node->arity() == 3);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *weights = getInputTensor(node->weights());
- const Tensor *bias = getOptionalInputTensor(node->bias());
- Tensor *output = getOutputTensor(node);
-
- FullyConnectedParams params{};
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleGreater *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *x = getInputTensor(node->x());
- const Tensor *y = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Greater>(x, y, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleGreaterEqual *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *x = getInputTensor(node->x());
- const Tensor *y = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::GreaterEqual>(x, y, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleIf *node)
-{
- auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node);
- assert(node->arity() == 1 + node->input_count());
- assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
-
- const Tensor *cond = getInputTensor(node->cond());
- std::vector<const Tensor *> inputs(node->input_count());
- for (uint32_t i = 0; i < node->input_count(); ++i)
- {
- inputs[i] = getInputTensor(node->input(i));
- }
- std::vector<Tensor *> outputs = getOutputTensors(output_nodes);
-
- RuntimeGraph *then_graph = getRuntimeGraph(node->then_graph());
- RuntimeGraph *else_graph = getRuntimeGraph(node->else_graph());
-
- return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph,
- else_graph);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleInstanceNorm *node)
-{
- assert(node->arity() == 3);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *gamma = getInputTensor(node->gamma());
- const Tensor *beta = getInputTensor(node->beta());
-
- Tensor *output = getOutputTensor(node);
-
- InstanceNormParams params{};
- params.epsilon = node->epsilon();
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleInput *)
-{
- throw std::runtime_error("Input node cannot be executed.");
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleL2Normalize *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->x());
- Tensor *output = getOutputTensor(node);
-
- L2NormParams params{};
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::L2Normalize>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleL2Pool2D *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->value());
- Tensor *output = getOutputTensor(node);
-
- Pool2DParams params{};
- params.padding = node->padding();
- params.filter_height = node->filter()->h();
- params.filter_width = node->filter()->w();
- params.stride_height = node->stride()->h();
- params.stride_width = node->stride()->w();
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::L2Pool2D>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLeakyRelu *node)
-{
- assert(node->arity() == 1);
- const Tensor *input = getInputTensor(node->features());
- Tensor *output = getOutputTensor(node);
-
- LeakyReluParams params{};
- params.alpha = node->alpha();
-
- return std::make_unique<kernels::LeakyRelu>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLess *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *x = getInputTensor(node->x());
- const Tensor *y = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Less>(x, y, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLessEqual *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *x = getInputTensor(node->x());
- const Tensor *y = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::LessEqual>(x, y, output);
-}
-
-std::unique_ptr<Kernel>
-KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLocalResponseNormalization *node)
-{
- assert(node->arity() == 1);
- const Tensor *input = getInputTensor(node->input());
- Tensor *output = getOutputTensor(node);
-
- LocalResponseNormalizationParams params{};
- params.radius = node->radius();
- params.bias = node->bias();
- params.alpha = node->alpha();
- params.beta = node->beta();
-
- return std::make_unique<kernels::LocalResponseNormalization>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogicalAnd *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input1 = getInputTensor(node->x());
- const Tensor *input2 = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::LogicalAnd>(input1, input2, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogicalNot *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->x());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::LogicalNot>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogicalOr *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input1 = getInputTensor(node->x());
- const Tensor *input2 = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::LogicalOr>(input1, input2, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogistic *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->x());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Logistic>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogSoftmax *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->logits());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::LogSoftmax>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMaximum *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input1 = getInputTensor(node->x());
- const Tensor *input2 = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Maximum>(input1, input2, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMaxPool2D *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->value());
- Tensor *output = getOutputTensor(node);
-
- Pool2DParams params{};
- params.padding = node->padding();
- params.filter_height = node->filter()->h();
- params.filter_width = node->filter()->w();
- params.stride_height = node->stride()->h();
- params.stride_width = node->stride()->w();
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::MaxPool2D>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMean *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *axes = getInputTensor(node->reduction_indices());
- Tensor *output = getOutputTensor(node);
-
- ReducerParams params{};
- params.keep_dims = node->keep_dims();
-
- return std::make_unique<kernels::Mean>(input, axes, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMinimum *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input1 = getInputTensor(node->x());
- const Tensor *input2 = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Minimum>(input1, input2, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMirrorPad *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *paddings = getInputTensor(node->paddings());
- Tensor *output = getOutputTensor(node);
-
- MirrorPadParams params{};
- params.mode = node->mode();
-
- return std::make_unique<kernels::MirrorPad>(input, paddings, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMul *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input1 = getInputTensor(node->x());
- const Tensor *input2 = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- MulParams params{};
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::Mul>(input1, input2, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleNeg *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->x());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Neg>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleNotEqual *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *x = getInputTensor(node->x());
- const Tensor *y = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::NotEqual>(x, y, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleOutput *)
-{
- throw std::runtime_error("Output node cannot be executed.");
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePack *node)
-{
- assert(node->arity() == node->values_count());
-
- std::vector<const Tensor *> inputs(node->values_count());
- for (uint32_t i = 0; i < node->values_count(); ++i)
- {
- inputs[i] = getInputTensor(node->values(i));
- }
- Tensor *output = getOutputTensor(node);
-
- PackParams params{};
- params.axis = node->axis();
- params.values_count = node->values_count();
-
- return std::make_unique<kernels::Pack>(std::move(inputs), output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePad *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *paddings = getInputTensor(node->paddings());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Pad>(input, paddings, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePadV2 *node)
-{
- assert(node->arity() == 3);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *paddings = getInputTensor(node->paddings());
- const Tensor *constant_values = getInputTensor(node->constant_values());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePow *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input1 = getInputTensor(node->x());
- const Tensor *input2 = getInputTensor(node->y());
-
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Pow>(input1, input2, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePRelu *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *alpha = getInputTensor(node->alpha());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::PRelu>(input, alpha, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleRelu *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->features());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Relu>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleRelu6 *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->features());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Relu6>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleReshape *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input = getInputTensor(node->tensor());
- const Tensor *shape = getInputTensor(node->shape());
- Tensor *output = getOutputTensor(node);
-
- // NOTE 'newShape' attribute is ignored.
- return std::make_unique<kernels::Reshape>(input, shape, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleResizeBilinear *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *size = getInputTensor(node->size());
- Tensor *output = getOutputTensor(node);
-
- ResizeBilinearParams params{};
- params.align_corners = node->align_corners();
- params.half_pixel_centers = node->half_pixel_centers();
-
- return std::make_unique<kernels::ResizeBilinear>(input, size, output, params);
-}
-
-std::unique_ptr<Kernel>
-KernelBuilderLet<KB::OPQR>::visit(const luci::CircleResizeNearestNeighbor *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *size = getInputTensor(node->size());
- Tensor *output = getOutputTensor(node);
-
- ResizeNearestNeighborParams params{};
- params.align_corners = node->align_corners();
- // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated
- // Current CircleResizeNearestNeighbor don't have half_pixel_centers.
- // default value on current is false.
- // it need to be updated when CircleResizeNearestNeighbor updated.
- params.half_pixel_centers = false;
-
- return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleReverseV2 *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input = getInputTensor(node->tensor());
- const Tensor *axes = getInputTensor(node->axis());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::ReverseV2>(input, axes, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleRsqrt *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->x());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Rsqrt>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSlice *node)
-{
- assert(node->arity() == 3);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *begin = getInputTensor(node->begin());
- const Tensor *size = getInputTensor(node->size());
-
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Slice>(input, begin, size, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSoftmax *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->logits());
- Tensor *output = getOutputTensor(node);
-
- SoftmaxParams params{};
- params.beta = node->beta();
-
- return std::make_unique<kernels::Softmax>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSpaceToBatchND *node)
-{
- assert(node->arity() == 3);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *block_shape = getInputTensor(node->block_shape());
- const Tensor *paddings = getInputTensor(node->paddings());
-
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output);
- ;
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSpaceToDepth *node)
-{
- assert(node->arity() == 1);
- const Tensor *input = getInputTensor(node->input());
-
- Tensor *output = getOutputTensor(node);
-
- SpaceToDepthParams params{};
- params.block_size = node->block_size();
-
- return std::make_unique<kernels::SpaceToDepth>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSplit *node)
-{
- auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node);
- assert(node->arity() == 2);
- assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
-
- const Tensor *axis = getInputTensor(node->split_dim());
- const Tensor *input = getInputTensor(node->input());
- std::vector<Tensor *> outputs = getOutputTensors(output_nodes);
-
- // NOTE 'num_splits' attribute is ignored.
- return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSqrt *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->x());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Sqrt>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSquare *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->x());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Square>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSquaredDifference *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input1 = getInputTensor(node->x());
- const Tensor *input2 = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::SquaredDifference>(input1, input2, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSqueeze *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->input());
- Tensor *output = getOutputTensor(node);
-
- SqueezeParams params{};
- params.squeeze_dims = node->squeeze_dims();
-
- return std::make_unique<kernels::Squeeze>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleStridedSlice *node)
-{
- assert(node->arity() == 4);
-
- const Tensor *input = getInputTensor(node->input());
- const Tensor *begin = getInputTensor(node->begin());
- const Tensor *end = getInputTensor(node->end());
- const Tensor *strides = getInputTensor(node->strides());
-
- Tensor *output = getOutputTensor(node);
-
- StridedSliceParams params{};
- params.begin_mask = node->begin_mask();
- params.ellipsis_mask = node->ellipsis_mask();
- params.end_mask = node->end_mask();
- params.new_axis_mask = node->new_axis_mask();
- params.shrink_axis_mask = node->shrink_axis_mask();
-
- return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSub *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input1 = getInputTensor(node->x());
- const Tensor *input2 = getInputTensor(node->y());
- Tensor *output = getOutputTensor(node);
-
- SubParams params{};
- params.activation = node->fusedActivationFunction();
-
- return std::make_unique<kernels::Sub>(input1, input2, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleTanh *node)
-{
- assert(node->arity() == 1);
-
- const Tensor *input = getInputTensor(node->x());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Tanh>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleTranspose *node)
-{
- assert(node->arity() == 2);
-
- const Tensor *input = getInputTensor(node->a());
- const Tensor *perm = getInputTensor(node->perm());
- Tensor *output = getOutputTensor(node);
-
- return std::make_unique<kernels::Transpose>(input, perm, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleTransposeConv *node)
-{
- assert(node->arity() == 4);
-
- const Tensor *input_sizes = getInputTensor(node->inputSizes());
- const Tensor *filter = getInputTensor(node->filter());
- const Tensor *out_backprop = getInputTensor(node->outBackprop());
- const Tensor *bias = getOptionalInputTensor(node->bias());
-
- Tensor *output = getOutputTensor(node);
-
- TransposeConvParams params{};
- params.padding = node->padding();
- params.stride_height = node->stride()->h();
- params.stride_width = node->stride()->w();
-
- return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
- params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleUnpack *node)
-{
- auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node);
- assert(node->arity() == 1);
- assert(output_nodes.size() == static_cast<size_t>(node->num()));
-
- const Tensor *input = getInputTensor(node->value());
- std::vector<Tensor *> outputs = getOutputTensors(output_nodes);
-
- UnpackParams params{};
- params.axis = node->axis();
-
- // NOTE 'num' attribute is ignored.
- return std::make_unique<kernels::Unpack>(input, std::move(outputs), params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::WXYZ>::visit(const luci::CircleWhile *node)
-{
- auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node);
- assert(node->arity() == node->input_count());
- assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
-
- std::vector<const Tensor *> inputs(node->input_count());
- for (uint32_t i = 0; i < node->input_count(); ++i)
- {
- inputs[i] = getInputTensor(node->input(i));
- }
- std::vector<Tensor *> outputs = getOutputTensors(output_nodes);
-
- RuntimeGraph *cond_graph = getRuntimeGraph(node->cond_graph());
- RuntimeGraph *body_graph = getRuntimeGraph(node->body_graph());
-
- return std::make_unique<kernels::While>(std::move(inputs), std::move(outputs), cond_graph,
- body_graph);
-}
-
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-interpreter/src/loader/KernelBuilder.h
index 406c41ef6..b1f383394 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.h
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.h
@@ -30,17 +30,21 @@
namespace luci_interpreter
{
+class KernelBuilderRegistry;
+
class KernelBuilder : public KernelBuilderHelper
{
public:
KernelBuilder(
const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
- const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
- : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
- {
- }
+ const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
+
+ ~KernelBuilder();
std::unique_ptr<Kernel> build(const luci::CircleNode *node);
+
+private:
+ std::unique_ptr<KernelBuilderRegistry> _builder_registry;
};
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
index d8611243e..7a457a62f 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
@@ -16,6 +16,7 @@
#include "loader/GraphLoader.h"
#include "loader/KernelBuilder.h"
+#include "luci_interpreter/SimpleMemoryManager.h"
#include <kernels/Add.h>
#include <kernels/ArgMax.h>
@@ -68,6 +69,7 @@
#include <kernels/Softmax.h>
#include <kernels/SpaceToDepth.h>
#include <kernels/Split.h>
+#include <kernels/SplitV.h>
#include <kernels/Sqrt.h>
#include <kernels/SquaredDifference.h>
#include <kernels/Squeeze.h>
@@ -91,6 +93,9 @@ class KernelBuilderTest : public Test
{
protected:
luci::CircleInput *createInputNode() { return createNode<luci::CircleInput>(); }
+ void SetUp() override { _memory_manager = std::make_unique<SimpleMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
template <typename NodeT, typename... Args> NodeT *createNode(Args &&... args)
{
@@ -114,10 +119,11 @@ protected:
{
std::unordered_map<const loco::Graph *, RuntimeGraph *> graph_to_runtime_graph;
- RuntimeGraph runtime_graph(nullptr);
+ RuntimeGraph runtime_graph(nullptr, _memory_manager.get());
+ graph_to_runtime_graph[&_graph] = &runtime_graph;
RuntimeToIR runtime_to_ir;
GraphLoader graph_loader(&_graph, &runtime_graph, runtime_to_ir, graph_to_runtime_graph,
- _node_to_tensor);
+ _node_to_tensor, _memory_manager.get());
graph_loader.loadTensors();
KernelBuilder kernel_builder(graph_to_runtime_graph, _node_to_tensor);
@@ -1091,6 +1097,31 @@ TEST_F(KernelBuilderTest, Split)
checkTensor(kernel->output(1), output2);
}
+TEST_F(KernelBuilderTest, SplitV)
+{
+ auto *input = createInputNode();
+ auto *size_splits = createInputNode();
+ auto *axis = createInputNode();
+ auto *op = createNode<luci::CircleSplitV>();
+ auto *output0 = createNodeOut<luci::CircleSplitVOut>(op, 0);
+ auto *output1 = createNodeOut<luci::CircleSplitVOut>(op, 1);
+
+ op->input(input);
+ op->size_splits(size_splits);
+ op->split_dim(axis);
+
+ op->num_split(2);
+
+ auto kernel = buildKernel<kernels::SplitV>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->input(), input);
+ checkTensor(kernel->size_splits(), size_splits);
+ checkTensor(kernel->axis(), axis);
+ checkTensor(kernel->output(0), output0);
+ checkTensor(kernel->output(1), output1);
+}
+
TEST_F(KernelBuilderTest, Sqrt)
{
auto *input = createInputNode();
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h b/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h
index 4517d1f19..d6fb253b1 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h
+++ b/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h
@@ -39,7 +39,7 @@ public:
{
}
-protected:
+public:
const Tensor *getInputTensor(const loco::Node *node) const;
const Tensor *getOptionalInputTensor(const loco::Node *node) const;
@@ -48,7 +48,7 @@ protected:
RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const;
-protected:
+public:
const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph() const
{
return _graph_to_runtime_graph;
@@ -64,6 +64,21 @@ private:
const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
};
+template <typename CircleNodeOut>
+std::vector<const loco::Node *> collectOutputNodes(const loco::Node *node)
+{
+ std::vector<const CircleNodeOut *> output_nodes;
+ for (const loco::Node *loco_node : loco::succs(node))
+ {
+ output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node));
+ }
+ std::sort(output_nodes.begin(), output_nodes.end(),
+ [](const CircleNodeOut *node1, const CircleNodeOut *node2) {
+ return node1->index() < node2->index();
+ });
+ return {output_nodes.cbegin(), output_nodes.cend()};
+}
+
} // namespace luci_interpreter
#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp
index ff211bf09..2f278b087 100644
--- a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp
@@ -23,9 +23,10 @@ namespace luci_interpreter
ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
RuntimeToIR &runtime_to_ir,
- std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+ std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+ IMemoryManager *memory_manager)
: _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir),
- _node_to_tensor(node_to_tensor)
+ _node_to_tensor(node_to_tensor), _memory_manager(memory_manager)
{
}
@@ -35,14 +36,14 @@ void ModuleLoader::load()
// process for control flow nodes.
for (size_t i = 0; i < _module->size(); ++i)
{
- _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph());
+ _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph(_memory_manager));
}
for (size_t i = 0; i < _module->size(); ++i)
{
const loco::Graph *graph = _module->graph(i);
RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
GraphLoader loader(graph, runtime_graph, _runtime_to_ir, _graph_to_runtime_graph,
- _node_to_tensor);
+ _node_to_tensor, _memory_manager);
loader.loadTensors();
loader.initInputOutputTensors();
loader.loadOperators();
diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.h b/compiler/luci-interpreter/src/loader/ModuleLoader.h
index 1af0ed747..11326a2ee 100644
--- a/compiler/luci-interpreter/src/loader/ModuleLoader.h
+++ b/compiler/luci-interpreter/src/loader/ModuleLoader.h
@@ -19,6 +19,7 @@
#include "core/RuntimeModule.h"
#include "loader/RuntimeToIR.h"
+#include "luci_interpreter/MemoryManager.h"
#include <luci/IR/Module.h>
@@ -32,11 +33,13 @@ class ModuleLoader
public:
ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
RuntimeToIR &runtime_to_ir,
- std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
+ std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+ IMemoryManager *memory_manager);
void load();
private:
+ IMemoryManager *_memory_manager;
const luci::Module *_module;
RuntimeModule *_runtime_module;
RuntimeToIR &_runtime_to_ir;
diff --git a/compiler/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-interpreter/src/loader/nodes/Add.cpp
new file mode 100644
index 000000000..decccaa1d
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Add.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Add.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAdd(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleAdd *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ AddParams params{};
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::Add>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp
new file mode 100644
index 000000000..0ee367748
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ArgMax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleArgMax(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleArgMax *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *axis = helper.getInputTensor(node->dimension());
+ Tensor *output = helper.getOutputTensor(node);
+
+ ArgMaxParams params{};
+ params.output_type = node->output_type();
+
+ return std::make_unique<kernels::ArgMax>(input, axis, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
new file mode 100644
index 000000000..5bc37bd4a
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/AveragePool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleAveragePool2D *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->value());
+ Tensor *output = helper.getOutputTensor(node);
+
+ Pool2DParams params{};
+ params.padding = node->padding();
+ params.filter_height = node->filter()->h();
+ params.filter_width = node->filter()->w();
+ params.stride_height = node->stride()->h();
+ params.stride_width = node->stride()->w();
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::AveragePool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
new file mode 100644
index 000000000..33d0e2db6
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchToSpaceND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleBatchToSpaceND *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *block_shape = helper.getInputTensor(node->block_shape());
+ const Tensor *crops = helper.getInputTensor(node->crops());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Builders.h b/compiler/luci-interpreter/src/loader/nodes/Builders.h
new file mode 100644
index 000000000..eab284008
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Builders.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
+#define LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
+
+#include "loader/KernelBuilderHelper.h"
+
+#include "luci/IR/CircleNodes.h"
+
+namespace luci_interpreter
+{
+
+#define REGISTER_KERNEL(name) \
+ std::unique_ptr<Kernel> build_kernel_Circle##name(const luci::CircleNode *circle_node, \
+ KernelBuilderHelper &helper);
+
+#include "KernelsToBuild.lst"
+
+#undef REGISTER_KERNEL
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
diff --git a/compiler/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp
new file mode 100644
index 000000000..21ea5ceab
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Cast.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleCast(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleCast *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Cast>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp
new file mode 100644
index 000000000..7823a9967
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Concatenation.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConcatenation(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleConcatenation *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ std::vector<const Tensor *> inputs(node->numValues());
+ for (uint32_t i = 0; i < node->numValues(); ++i)
+ {
+ inputs[i] = helper.getInputTensor(node->values(i));
+ }
+ Tensor *output = helper.getOutputTensor(node);
+
+ ConcatenationParams params{};
+ params.axis = node->axis();
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
new file mode 100644
index 000000000..71c8ef3e4
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Conv2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleConv2D *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *filter = helper.getInputTensor(node->filter());
+ const Tensor *bias = helper.getInputTensor(node->bias());
+ Tensor *output = helper.getOutputTensor(node);
+
+ auto im2col =
+ std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
+ im2col->set_observable(false);
+ im2col->set_data_buffer(nullptr);
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(im2col));
+
+ Conv2DParams params{};
+ params.padding = node->padding();
+ params.stride_height = node->stride()->h();
+ params.stride_width = node->stride()->w();
+ params.dilation_height_factor = node->dilation()->h();
+ params.dilation_width_factor = node->dilation()->w();
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::Conv2D>(input, filter, bias, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
new file mode 100644
index 000000000..0310fb23f
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthToSpace.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleDepthToSpace *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ Tensor *output = helper.getOutputTensor(node);
+
+ DepthToSpaceParams params{};
+ params.block_size = node->block_size();
+
+ return std::make_unique<kernels::DepthToSpace>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..c2f0346a2
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthwiseConv2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *filter = helper.getInputTensor(node->filter());
+ const Tensor *bias = helper.getInputTensor(node->bias());
+ Tensor *output = helper.getOutputTensor(node);
+
+ DepthwiseConv2DParams params{};
+ params.padding = node->padding();
+ params.depth_multiplier = node->depthMultiplier();
+ params.stride_height = node->stride()->h();
+ params.stride_width = node->stride()->w();
+ params.dilation_height_factor = node->dilation()->h();
+ params.dilation_width_factor = node->dilation()->w();
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-interpreter/src/loader/nodes/Div.cpp
new file mode 100644
index 000000000..56c2e98f2
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Div.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Div.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDiv(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleDiv *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ DivParams params{};
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::Div>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp
new file mode 100644
index 000000000..98ee78be7
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Elu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleElu(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleElu *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->features());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Elu>(input, output);
+}
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp
new file mode 100644
index 000000000..649d9bfe9
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Equal.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleEqual(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+
+{
+ const auto *node = dynamic_cast<const luci::CircleEqual *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *x = helper.getInputTensor(node->x());
+ const Tensor *y = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Equal>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp
new file mode 100644
index 000000000..411d142c3
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Exp.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExp(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleExp *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Exp>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp
new file mode 100644
index 000000000..6d8435f6c
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Floor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloor(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleFloor *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Floor>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp
new file mode 100644
index 000000000..cae2e186e
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FloorDiv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleFloorDiv *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *x = helper.getInputTensor(node->x());
+ const Tensor *y = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::FloorDiv>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
new file mode 100644
index 000000000..2917598fc
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FullyConnected.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleFullyConnected *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *weights = helper.getInputTensor(node->weights());
+ const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+ Tensor *output = helper.getOutputTensor(node);
+
+ FullyConnectedParams params{};
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp
new file mode 100644
index 000000000..3db11b840
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Greater.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreater(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleGreater *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *x = helper.getInputTensor(node->x());
+ const Tensor *y = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Greater>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
new file mode 100644
index 000000000..dbe051d67
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/GreaterEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleGreaterEqual *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *x = helper.getInputTensor(node->x());
+ const Tensor *y = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::GreaterEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-interpreter/src/loader/nodes/If.cpp
new file mode 100644
index 000000000..5983f4d3b
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/If.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/If.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleIf(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleIf *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node);
+ assert(node->arity() == 1 + node->input_count());
+ assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
+
+ const Tensor *cond = helper.getInputTensor(node->cond());
+ std::vector<const Tensor *> inputs(node->input_count());
+ for (uint32_t i = 0; i < node->input_count(); ++i)
+ {
+ inputs[i] = helper.getInputTensor(node->input(i));
+ }
+ std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+ RuntimeGraph *then_graph = helper.getRuntimeGraph(node->then_graph());
+ RuntimeGraph *else_graph = helper.getRuntimeGraph(node->else_graph());
+
+ return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph,
+ else_graph);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
new file mode 100644
index 000000000..0a8fb85e2
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/InstanceNorm.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleInstanceNorm *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *gamma = helper.getInputTensor(node->gamma());
+ const Tensor *beta = helper.getInputTensor(node->beta());
+
+ Tensor *output = helper.getOutputTensor(node);
+
+ InstanceNormParams params{};
+ params.epsilon = node->epsilon();
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp
new file mode 100644
index 000000000..05f920266
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Normalize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleL2Normalize *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ L2NormParams params{};
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::L2Normalize>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
new file mode 100644
index 000000000..0e70afafa
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Pool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleL2Pool2D *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->value());
+ Tensor *output = helper.getOutputTensor(node);
+
+ Pool2DParams params{};
+ params.padding = node->padding();
+ params.filter_height = node->filter()->h();
+ params.filter_width = node->filter()->w();
+ params.stride_height = node->stride()->h();
+ params.stride_width = node->stride()->w();
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::L2Pool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
new file mode 100644
index 000000000..7b229ad0e
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LeakyRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleLeakyRelu *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+ const Tensor *input = helper.getInputTensor(node->features());
+ Tensor *output = helper.getOutputTensor(node);
+
+ LeakyReluParams params{};
+ params.alpha = node->alpha();
+
+ return std::make_unique<kernels::LeakyRelu>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-interpreter/src/loader/nodes/Less.cpp
new file mode 100644
index 000000000..81156f275
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Less.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Less.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLess(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleLess *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *x = helper.getInputTensor(node->x());
+ const Tensor *y = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Less>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp
new file mode 100644
index 000000000..82141e5ae
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LessEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLessEqual(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleLessEqual *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *x = helper.getInputTensor(node->x());
+ const Tensor *y = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::LessEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
new file mode 100644
index 000000000..a12dce0a0
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LocalResponseNormalization.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleLocalResponseNormalization *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+ const Tensor *input = helper.getInputTensor(node->input());
+ Tensor *output = helper.getOutputTensor(node);
+
+ LocalResponseNormalizationParams params{};
+ params.radius = node->radius();
+ params.bias = node->bias();
+ params.alpha = node->alpha();
+ params.beta = node->beta();
+
+ return std::make_unique<kernels::LocalResponseNormalization>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
new file mode 100644
index 000000000..6cf547aae
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogSoftmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleLogSoftmax *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->logits());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::LogSoftmax>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
new file mode 100644
index 000000000..2c9549f71
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalAnd.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleLogicalAnd *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::LogicalAnd>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp
new file mode 100644
index 000000000..3d327d6c4
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalNot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleLogicalNot *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::LogicalNot>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp
new file mode 100644
index 000000000..50566bb30
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalOr.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleLogicalOr *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::LogicalOr>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp
new file mode 100644
index 000000000..e4160edb3
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Logistic.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogistic(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleLogistic *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Logistic>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
new file mode 100644
index 000000000..914f22838
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MaxPool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleMaxPool2D *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->value());
+ Tensor *output = helper.getOutputTensor(node);
+
+ Pool2DParams params{};
+ params.padding = node->padding();
+ params.filter_height = node->filter()->h();
+ params.filter_width = node->filter()->w();
+ params.stride_height = node->stride()->h();
+ params.stride_width = node->stride()->w();
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::MaxPool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp
new file mode 100644
index 000000000..dc50d6773
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Maximum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaximum(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleMaximum *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Maximum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp
new file mode 100644
index 000000000..97d91207f
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mean.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMean(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleMean *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *axes = helper.getInputTensor(node->reduction_indices());
+ Tensor *output = helper.getOutputTensor(node);
+
+ auto temp_index_unique =
+ std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+ temp_index_unique->set_observable(false);
+ temp_index_unique->set_data_buffer(nullptr);
+ Tensor *temp_index =
+ helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
+
+ auto resolved_axes_unique =
+ std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+ resolved_axes_unique->set_observable(false);
+ resolved_axes_unique->set_data_buffer(nullptr);
+ Tensor *resolved_axes =
+ helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
+
+ auto temp_sum_unique =
+ std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
+ temp_sum_unique->set_observable(false);
+ temp_sum_unique->set_data_buffer(nullptr);
+ Tensor *temp_sum = helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_sum_unique));
+
+ ReducerParams params{};
+ params.keep_dims = node->keep_dims();
+
+ return std::make_unique<kernels::Mean>(input, axes, output, temp_index, resolved_axes, temp_sum,
+ params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp
new file mode 100644
index 000000000..ff659524a
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Minimum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMinimum(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleMinimum *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Minimum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp
new file mode 100644
index 000000000..ebf294583
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MirrorPad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleMirrorPad *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *paddings = helper.getInputTensor(node->paddings());
+ Tensor *output = helper.getOutputTensor(node);
+
+ MirrorPadParams params{};
+ params.mode = node->mode();
+
+ return std::make_unique<kernels::MirrorPad>(input, paddings, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp
new file mode 100644
index 000000000..4f9da967d
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mul.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMul(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleMul *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ MulParams params{};
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::Mul>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp
new file mode 100644
index 000000000..23c00537b
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Neg.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNeg(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleNeg *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Neg>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp
new file mode 100644
index 000000000..8e5711fc1
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/NotEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNotEqual(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleNotEqual *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *x = helper.getInputTensor(node->x());
+ const Tensor *y = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::NotEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp
new file mode 100644
index 000000000..e31601bf6
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePRelu(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CirclePRelu *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *alpha = helper.getInputTensor(node->alpha());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::PRelu>(input, alpha, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp
new file mode 100644
index 000000000..699472081
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePack(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CirclePack *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == node->values_count());
+
+ std::vector<const Tensor *> inputs(node->values_count());
+ for (uint32_t i = 0; i < node->values_count(); ++i)
+ {
+ inputs[i] = helper.getInputTensor(node->values(i));
+ }
+ Tensor *output = helper.getOutputTensor(node);
+
+ PackParams params{};
+ params.axis = node->axis();
+ params.values_count = node->values_count();
+
+ return std::make_unique<kernels::Pack>(std::move(inputs), output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp
new file mode 100644
index 000000000..770549295
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePad(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CirclePad *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *paddings = helper.getInputTensor(node->paddings());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Pad>(input, paddings, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp
new file mode 100644
index 000000000..12deb15f0
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PadV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePadV2(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CirclePadV2 *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *paddings = helper.getInputTensor(node->paddings());
+ const Tensor *constant_values = helper.getInputTensor(node->constant_values());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp
new file mode 100644
index 000000000..b430bc94f
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pow.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePow(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CirclePow *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Pow>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp
new file mode 100644
index 000000000..d53a66a06
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleRelu *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->features());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Relu>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp
new file mode 100644
index 000000000..f1b5d219b
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu6.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu6(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleRelu6 *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->features());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Relu6>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp
new file mode 100644
index 000000000..89e3ecebf
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Reshape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReshape(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleReshape *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->tensor());
+ const Tensor *shape = helper.getInputTensor(node->shape());
+ Tensor *output = helper.getOutputTensor(node);
+
+ // NOTE 'newShape' attribute is ignored.
+ return std::make_unique<kernels::Reshape>(input, shape, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
new file mode 100644
index 000000000..dca56588d
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeBilinear.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleResizeBilinear *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *size = helper.getInputTensor(node->size());
+ Tensor *output = helper.getOutputTensor(node);
+
+ ResizeBilinearParams params{};
+ params.align_corners = node->align_corners();
+ params.half_pixel_centers = node->half_pixel_centers();
+
+ return std::make_unique<kernels::ResizeBilinear>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
new file mode 100644
index 000000000..d1ea19c0f
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleResizeNearestNeighbor *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *size = helper.getInputTensor(node->size());
+ Tensor *output = helper.getOutputTensor(node);
+
+ ResizeNearestNeighborParams params{};
+ params.align_corners = node->align_corners();
+ // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated
+ // Current CircleResizeNearestNeighbor don't have half_pixel_centers.
+ // default value on current is false.
+ // it need to be updated when CircleResizeNearestNeighbor updated.
+ params.half_pixel_centers = false;
+
+ return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp
new file mode 100644
index 000000000..ea00f5408
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ReverseV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReverseV2(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleReverseV2 *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->tensor());
+ const Tensor *axes = helper.getInputTensor(node->axis());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::ReverseV2>(input, axes, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp
new file mode 100644
index 000000000..ff87f435c
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Rsqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRsqrt(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleRsqrt *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Rsqrt>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp
new file mode 100644
index 000000000..741cd0806
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Slice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSlice(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleSlice *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *begin = helper.getInputTensor(node->begin());
+ const Tensor *size = helper.getInputTensor(node->size());
+
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Slice>(input, begin, size, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp
new file mode 100644
index 000000000..b15e4b6f3
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Softmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSoftmax(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleSoftmax *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->logits());
+ Tensor *output = helper.getOutputTensor(node);
+
+ SoftmaxParams params{};
+ params.beta = node->beta();
+
+ return std::make_unique<kernels::Softmax>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
new file mode 100644
index 000000000..91c237aa5
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToBatchND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleSpaceToBatchND *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 3);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *block_shape = helper.getInputTensor(node->block_shape());
+ const Tensor *paddings = helper.getInputTensor(node->paddings());
+
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
new file mode 100644
index 000000000..3cbbd9718
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToDepth.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleSpaceToDepth *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+ const Tensor *input = helper.getInputTensor(node->input());
+
+ Tensor *output = helper.getOutputTensor(node);
+
+ SpaceToDepthParams params{};
+ params.block_size = node->block_size();
+
+ return std::make_unique<kernels::SpaceToDepth>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-interpreter/src/loader/nodes/Split.cpp
new file mode 100644
index 000000000..32553ad5e
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Split.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Split.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplit(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleSplit *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node);
+ assert(node->arity() == 2);
+ assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
+
+ const Tensor *axis = helper.getInputTensor(node->split_dim());
+ const Tensor *input = helper.getInputTensor(node->input());
+ std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+ // NOTE 'num_splits' attribute is ignored.
+ return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp
new file mode 100644
index 000000000..d78816447
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SplitV.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplitV(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleSplitV *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ auto output_nodes = collectOutputNodes<luci::CircleSplitVOut>(node);
+ assert(node->arity() == 3);
+ assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *sizes_data = helper.getInputTensor(node->size_splits());
+ const Tensor *axis = helper.getInputTensor(node->split_dim());
+ std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+ // NOTE 'num_splits' attribute is ignored.
+ return std::make_unique<kernels::SplitV>(input, sizes_data, axis, std::move(outputs));
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp
new file mode 100644
index 000000000..56dd986f1
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqrt(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleSqrt *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Sqrt>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-interpreter/src/loader/nodes/Square.cpp
new file mode 100644
index 000000000..43aadb969
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Square.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Square.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquare(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleSquare *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Square>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
new file mode 100644
index 000000000..6a2717aa2
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SquaredDifference.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleSquaredDifference *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::SquaredDifference>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp
new file mode 100644
index 000000000..583ff9314
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Squeeze.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqueeze(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleSqueeze *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ Tensor *output = helper.getOutputTensor(node);
+
+ SqueezeParams params{};
+ params.squeeze_dims = node->squeeze_dims();
+
+ return std::make_unique<kernels::Squeeze>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp
new file mode 100644
index 000000000..fe5fa7707
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/StridedSlice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleStridedSlice *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 4);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *begin = helper.getInputTensor(node->begin());
+ const Tensor *end = helper.getInputTensor(node->end());
+ const Tensor *strides = helper.getInputTensor(node->strides());
+
+ Tensor *output = helper.getOutputTensor(node);
+
+ StridedSliceParams params{};
+ params.begin_mask = node->begin_mask();
+ params.ellipsis_mask = node->ellipsis_mask();
+ params.end_mask = node->end_mask();
+ params.new_axis_mask = node->new_axis_mask();
+ params.shrink_axis_mask = node->shrink_axis_mask();
+
+ return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp
new file mode 100644
index 000000000..bad4fbb13
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sub.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSub(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleSub *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input1 = helper.getInputTensor(node->x());
+ const Tensor *input2 = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ SubParams params{};
+ params.activation = node->fusedActivationFunction();
+
+ return std::make_unique<kernels::Sub>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp
new file mode 100644
index 000000000..f4255291b
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Tanh.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTanh(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleTanh *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 1);
+
+ const Tensor *input = helper.getInputTensor(node->x());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Tanh>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp
new file mode 100644
index 000000000..4e095fbbc
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Transpose.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTranspose(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleTranspose *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->a());
+ const Tensor *perm = helper.getInputTensor(node->perm());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Transpose>(input, perm, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp
new file mode 100644
index 000000000..1b954c35c
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/TransposeConv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleTransposeConv *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 4);
+
+ const Tensor *input_sizes = helper.getInputTensor(node->inputSizes());
+ const Tensor *filter = helper.getInputTensor(node->filter());
+ const Tensor *out_backprop = helper.getInputTensor(node->outBackprop());
+ const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+
+ Tensor *output = helper.getOutputTensor(node);
+
+ DataType scratch_data_type =
+ helper.getInputTensor(node)->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+
+ auto scratch_tensor =
+ std::make_unique<Tensor>(scratch_data_type, Shape({}), AffineQuantization{}, "");
+ scratch_tensor->set_observable(false);
+ scratch_tensor->set_data_buffer(nullptr);
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratch_tensor));
+
+ TransposeConvParams params{};
+ params.padding = node->padding();
+ params.stride_height = node->stride()->h();
+ params.stride_width = node->stride()->w();
+
+ return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
+ tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp
new file mode 100644
index 000000000..978c738c6
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Unpack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleUnpack(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleUnpack *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node);
+ assert(node->arity() == 1);
+ assert(output_nodes.size() == static_cast<size_t>(node->num()));
+
+ const Tensor *input = helper.getInputTensor(node->value());
+ std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+ UnpackParams params{};
+ params.axis = node->axis();
+
+ // NOTE 'num' attribute is ignored.
+ return std::make_unique<kernels::Unpack>(input, std::move(outputs), params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-interpreter/src/loader/nodes/While.cpp
new file mode 100644
index 000000000..284dc0c68
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/While.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/While.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleWhile(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleWhile *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+
+ auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node);
+ assert(node->arity() == node->input_count());
+ assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
+
+ std::vector<const Tensor *> inputs(node->input_count());
+ for (uint32_t i = 0; i < node->input_count(); ++i)
+ {
+ inputs[i] = helper.getInputTensor(node->input(i));
+ }
+ std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+ RuntimeGraph *cond_graph = helper.getRuntimeGraph(node->cond_graph());
+ RuntimeGraph *body_graph = helper.getRuntimeGraph(node->body_graph());
+
+ return std::make_unique<kernels::While>(std::move(inputs), std::move(outputs), cond_graph,
+ body_graph);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/CMakeLists.txt b/compiler/luci-micro/CMakeLists.txt
new file mode 100644
index 000000000..d936e12ba
--- /dev/null
+++ b/compiler/luci-micro/CMakeLists.txt
@@ -0,0 +1,57 @@
+set(ARM_C_COMPILER "arm-none-eabi-gcc")
+set(ARM_ASM_COMPILER "arm-none-eabi-gcc")
+set(ARM_CXX_COMPILER "arm-none-eabi-g++")
+set(ARM_OBJCOPY "arm-none-eabi-objcopy")
+
+find_program(ARM_C_COMPILER_PATH ${ARM_C_COMPILER})
+
+if(NOT ARM_C_COMPILER_PATH)
+ message(WARNING "ARM compiler is NOT FOUND, skipping luci-micro build")
+ return()
+endif()
+
+set(CMAKE_ARM_OPTIONS
+ -DLUCI_INTERPRETER_STATIC=ON
+ -DLUCI_STATIC=ON
+ "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_CURRENT_SOURCE_DIR}/standalone/Toolchain.cmake"
+ "-DLUCI_INTERPRETER_PAL_DIR=${CMAKE_CURRENT_SOURCE_DIR}/../luci-interpreter/pal/mcu"
+ "-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}"
+ "-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}"
+ -DCPU_ARCH=arm
+ -DC_COMPILER=${ARM_C_COMPILER}
+ -DCXX_COMPILER=${ARM_CXX_COMPILER}
+ -DASM_COMPILER=${ARM_ASM_COMPILER}
+ -DOBJCOPY=${ARM_OBJCOPY}
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+ -DENABLE_TEST=OFF
+ -DBUILD_GTEST=OFF
+ "-DNNAS_ROOT=${NNAS_PROJECT_SOURCE_DIR}"
+ -DENABLE_STRICT_BUILD=OFF
+)
+
+set(MICRO_ARM_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/standalone_arm")
+file(MAKE_DIRECTORY "${MICRO_ARM_BUILD_DIR}")
+
+set(MICRO_ARM_BUILD_DEPENDENCY "${MICRO_ARM_BUILD_DIR}/CMakeCache.txt")
+
+add_custom_command(
+ OUTPUT "${MICRO_ARM_BUILD_DEPENDENCY}"
+ COMMAND "${CMAKE_COMMAND}" "${CMAKE_CURRENT_SOURCE_DIR}/standalone" ${CMAKE_ARM_OPTIONS}
+ WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
+ DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/standalone/CMakeLists.txt"
+ VERBATIM
+)
+
+add_custom_target(luci_interpreter_micro_arm_cmake DEPENDS "${MICRO_ARM_BUILD_DEPENDENCY}")
+
+set(MICRO_ARM_BINARY "${MICRO_ARM_BUILD_DIR}/compiler/luci-interpreter/src/libluci_interpreter.a")
+
+add_custom_command(
+ OUTPUT "${MICRO_ARM_BINARY}"
+ COMMAND "${CMAKE_MAKE_PROGRAM}" luci_interpreter -j ${CPU_COUNT}
+ WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
+ DEPENDS luci_interpreter_micro_arm_cmake
+ VERBATIM
+)
+
+add_custom_target(luci_interpreter_micro_arm DEPENDS "${MICRO_ARM_BINARY}")
diff --git a/compiler/luci-micro/README.md b/compiler/luci-micro/README.md
new file mode 100644
index 000000000..6641ad7a7
--- /dev/null
+++ b/compiler/luci-micro/README.md
@@ -0,0 +1,56 @@
+# luci-micro
+
+`luci-micro` is MCU specialized build of luci-interpreter with several benchmark applications.
+
+## Contents
+
+Luci-micro contains cmake infrastructure to build:
+- stand-alone interpreter library
+- benchmark applications using luci interpreter on arm MCUs
+
+## How to build stand alone library
+
+Stand-alone library is simply built by `luci_interpreter_micro_arm` target.
+Result library will be placed in `<ONE root>/build/compiler/luci-micro/standalone_arm/luci-interpreter/src/libluci_interpreter.a`.
+
+### Prerequisites
+
+- Everything you need for ONE project: see [how-to-build-compiler.md](../../docs/howto/how-to-build-compiler.md)
+- arm-none-eabi-gcc and arm-none-eabi-g++ compilers
+
+To install needed arm compilers on ubuntu:
+```
+$ sudo apt-get install gcc-arm-none-eabi
+```
+
+**cmake build**
+
+``` bash
+$ cd <path to ONE>
+$ mkdir build
+# cd build
+$ cmake ../infra/nncc
+$ make -j$(nproc) luci_interpreter_micro_arm
+```
+
+**nncc script build**
+
+``` bash
+$ cd <path to ONE>
+$ ./nncc configure
+$ ./nncc build -j$(nproc) luci_interpreter_micro_arm
+```
+
+### Known issues
+
+Interpreter uses TensorFlow headers that produces warnings.
+
+`Linux` x86 build uses "-isystem" flag to suppress warnings from external sources,
+but some old arm compilers have issues with it:
+[bug](https://bugs.launchpad.net/gcc-arm-embedded/+bug/1698539)
+
+`-isystem` hack is disabled for MCU build, because of this MCU build is broken if `-Werror` flag is set.
+
+## How to use
+
+TBD
diff --git a/compiler/luci-micro/requires.cmake b/compiler/luci-micro/requires.cmake
new file mode 100644
index 000000000..5913aa9ad
--- /dev/null
+++ b/compiler/luci-micro/requires.cmake
@@ -0,0 +1 @@
+require(luci-interpreter)
diff --git a/compiler/luci-micro/standalone/CMakeLists.txt b/compiler/luci-micro/standalone/CMakeLists.txt
new file mode 100644
index 000000000..7953359ad
--- /dev/null
+++ b/compiler/luci-micro/standalone/CMakeLists.txt
@@ -0,0 +1,20 @@
+cmake_minimum_required(VERSION 3.10)
+project(luci_interpreter_micro_standalone)
+
+# Add fake target, so nothing is build
+set(BUILD_WHITELIST "dummy")
+
+add_subdirectory(${NNAS_ROOT}/infra/nncc ${CMAKE_CURRENT_BINARY_DIR}/nncc)
+
+set(ONE_COMPILER_SRC_DIR "${NNAS_PROJECT_SOURCE_DIR}/compiler")
+
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/loco ${CMAKE_CURRENT_BINARY_DIR}/loco)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/angkor ${CMAKE_CURRENT_BINARY_DIR}/angkor)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/oops ${CMAKE_CURRENT_BINARY_DIR}/oops)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-str ${CMAKE_CURRENT_BINARY_DIR}/pepper-str)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo ${CMAKE_CURRENT_BINARY_DIR}/logo)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo-core ${CMAKE_CURRENT_BINARY_DIR}/logo-core)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/locomotiv ${CMAKE_CURRENT_BINARY_DIR}/locomotiv)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/lang ${CMAKE_CURRENT_BINARY_DIR}/luci/lang)
+
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter)
diff --git a/compiler/luci-micro/standalone/Toolchain.cmake b/compiler/luci-micro/standalone/Toolchain.cmake
new file mode 100644
index 000000000..2d23b5de5
--- /dev/null
+++ b/compiler/luci-micro/standalone/Toolchain.cmake
@@ -0,0 +1,8 @@
+set(CMAKE_SYSTEM_NAME Generic)
+
+set(CMAKE_SYSTEM_PROCESSOR "${CPU_ARCH}")
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_C_COMPILER "${C_COMPILER}")
+set(CMAKE_CXX_COMPILER "${CXX_COMPILER}")
+set(CMAKE_ASM_COMPILER "${ASM_COMPILER}")
+set(CMAKE_OBJCOPY "${OBJCOPY}")
diff --git a/compiler/luci-pass-value-test/CMakeLists.txt b/compiler/luci-pass-value-test/CMakeLists.txt
index 2d2befe57..b31415870 100644
--- a/compiler/luci-pass-value-test/CMakeLists.txt
+++ b/compiler/luci-pass-value-test/CMakeLists.txt
@@ -38,7 +38,7 @@ add_test(NAME luci_pass_value_test
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/eval_driver.sh"
"${CMAKE_CURRENT_BINARY_DIR}"
"${ARTIFACTS_BIN_PATH}"
- "${NNCC_OVERLAY_DIR}/venv_2_3_0"
+ "${NNCC_OVERLAY_DIR}/venv_2_6_0"
"$<TARGET_FILE:luci_eval_driver>"
${LUCI_PASS_VALUE_TESTS}
)
diff --git a/compiler/luci-value-test/CMakeLists.txt b/compiler/luci-value-test/CMakeLists.txt
index 124f120d4..3c7185b80 100644
--- a/compiler/luci-value-test/CMakeLists.txt
+++ b/compiler/luci-value-test/CMakeLists.txt
@@ -18,7 +18,7 @@ add_test(NAME luci_value_test
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
"${CMAKE_CURRENT_BINARY_DIR}"
"${ARTIFACTS_BIN_PATH}"
- "${NNCC_OVERLAY_DIR}/venv_2_3_0"
+ "${NNCC_OVERLAY_DIR}/venv_2_6_0"
"$<TARGET_FILE:luci_eval_driver>"
${LUCI_VALUE_TESTS}
)
diff --git a/compiler/luci-value-test/README.md b/compiler/luci-value-test/README.md
index 90e92834b..6f1d0d54f 100644
--- a/compiler/luci-value-test/README.md
+++ b/compiler/luci-value-test/README.md
@@ -5,11 +5,15 @@
The test proceeds as follows
Step 1: Generate tflite files and circle files from TFLite recipes (listsed in test.lst).
+```
"TFLite recipe" -> tflchef -> "tflite file" -> tflite2circle -> "circle file"
+```
Step 2: Run TFLite interpreter and luci-interpreter for the generated tflite and circle, respectively.
(with the same input tensors filled with random values)
+```
circle file -> luci-interpreter -------> Execution result 1
tflite file -> TFLite interpreter -----> Execution result 2
+```
Step 3: Compare the execution result 1 and 2. The result must be the same.
diff --git a/compiler/luci-value-test/luci_eval_verifier.py b/compiler/luci-value-test/luci_eval_verifier.py
index f6b0620d8..a76bd1403 100755
--- a/compiler/luci-value-test/luci_eval_verifier.py
+++ b/compiler/luci-value-test/luci_eval_verifier.py
@@ -64,41 +64,23 @@ for idx in range(len(interpreter.get_output_details())):
shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
output_shape = [int(i) for i in shape_file.read().split(',')]
luci_output_data = np.reshape(output_data, output_shape)
+ intp_output_data = interpreter.get_tensor(output_details["index"])
try:
if output_details["dtype"] == np.uint8:
- if np.allclose(
- luci_output_data,
- interpreter.get_tensor(
- interpreter.get_output_details()[idx]["index"]),
- rtol=0,
- atol=0) == False:
+ if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
elif output_details["dtype"] == np.float32:
if np.allclose(
- luci_output_data,
- interpreter.get_tensor(
- interpreter.get_output_details()[idx]["index"]),
- rtol=1.e-5,
- atol=1.e-5) == False:
+ luci_output_data, intp_output_data, rtol=1.e-5, atol=1.e-5) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
elif output_details["dtype"] == np.int64:
- if np.allclose(
- luci_output_data,
- interpreter.get_tensor(
- interpreter.get_output_details()[idx]["index"]),
- rtol=0,
- atol=0) == False:
+ if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
elif output_details["dtype"] == np.int32:
- if np.allclose(
- luci_output_data,
- interpreter.get_tensor(
- interpreter.get_output_details()[idx]["index"]),
- rtol=0,
- atol=0) == False:
+ if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
raise SystemExit("Execution result of " + tflite_model +
" does not match with " + circle_model)
else:
diff --git a/compiler/luci/CMakeLists.txt b/compiler/luci/CMakeLists.txt
index 95c349c0d..9dcf1b55d 100644
--- a/compiler/luci/CMakeLists.txt
+++ b/compiler/luci/CMakeLists.txt
@@ -1,3 +1,14 @@
+# Some targets do not support dynamic linking: MCU, TrustZone applications, etc.
+# STATIC_LUCI option allows us to compile luci and luci related components safely
+# and suppress various cmake warnings.
+#
+# Currently this feature is used for luci-interpreter MCU builds.
+if (STATIC_LUCI)
+ set(LIBRARY_TYPE "STATIC")
+else()
+ set(LIBRARY_TYPE "SHARED")
+endif()
+
add_subdirectory(env)
add_subdirectory(log)
add_subdirectory(lang)
@@ -6,6 +17,7 @@ add_subdirectory(testhelper)
add_subdirectory(service)
add_subdirectory(pass)
add_subdirectory(profile)
+add_subdirectory(plan)
add_subdirectory(partition)
add_subdirectory(import)
add_subdirectory(export)
diff --git a/compiler/luci/env/CMakeLists.txt b/compiler/luci/env/CMakeLists.txt
index 4d1a89ad1..bba515551 100644
--- a/compiler/luci/env/CMakeLists.txt
+++ b/compiler/luci/env/CMakeLists.txt
@@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
-add_library(luci_env SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+ set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_env ${LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_env PUBLIC include)
target_link_libraries(luci_env PRIVATE nncc_common)
install(TARGETS luci_env DESTINATION lib)
diff --git a/compiler/luci/env/include/luci/UserSettings.h b/compiler/luci/env/include/luci/UserSettings.h
index b56bd65e2..9fe9592e5 100644
--- a/compiler/luci/env/include/luci/UserSettings.h
+++ b/compiler/luci/env/include/luci/UserSettings.h
@@ -33,6 +33,7 @@ struct UserSettings
MuteWarnings,
DisableValidation,
ProfilingDataGen,
+ ExecutionPlanGen,
};
static UserSettings *settings();
diff --git a/compiler/luci/env/src/UserSettings.cpp b/compiler/luci/env/src/UserSettings.cpp
index b4c661190..136fee799 100644
--- a/compiler/luci/env/src/UserSettings.cpp
+++ b/compiler/luci/env/src/UserSettings.cpp
@@ -31,6 +31,7 @@ private:
bool _MuteWarnings{false};
bool _DisableValidation{false};
bool _ProfilingDataGen{false};
+ bool _ExecutionPlanGen{false};
};
void UserSettingsImpl::set(const Key key, bool value)
@@ -46,6 +47,9 @@ void UserSettingsImpl::set(const Key key, bool value)
case Key::ProfilingDataGen:
_ProfilingDataGen = value;
break;
+ case Key::ExecutionPlanGen:
+ _ExecutionPlanGen = value;
+ break;
default:
throw std::runtime_error("Invalid key in boolean set");
break;
@@ -62,6 +66,8 @@ bool UserSettingsImpl::get(const Key key) const
return _DisableValidation;
case Key::ProfilingDataGen:
return _ProfilingDataGen;
+ case Key::ExecutionPlanGen:
+ return _ExecutionPlanGen;
default:
throw std::runtime_error("Invalid key in boolean get");
break;
diff --git a/compiler/luci/env/src/UserSettings.test.cpp b/compiler/luci/env/src/UserSettings.test.cpp
index 899c0c2a1..26c606edb 100644
--- a/compiler/luci/env/src/UserSettings.test.cpp
+++ b/compiler/luci/env/src/UserSettings.test.cpp
@@ -39,6 +39,18 @@ TEST(UserSettings, MuteWarnings)
ASSERT_TRUE(settings->get(luci::UserSettings::Key::MuteWarnings));
}
+TEST(UserSettings, MuteWarnings_NEG)
+{
+ auto settings = luci::UserSettings::settings();
+ ASSERT_NE(nullptr, settings);
+
+ settings->set(luci::UserSettings::Key::MuteWarnings, false);
+ ASSERT_FALSE(settings->get(luci::UserSettings::Key::MuteWarnings));
+
+ settings->set(luci::UserSettings::Key::MuteWarnings, true);
+ ASSERT_FALSE(settings->get(luci::UserSettings::Key::DisableValidation));
+}
+
TEST(UserSettings, DisableValidation)
{
auto settings = luci::UserSettings::settings();
@@ -51,6 +63,18 @@ TEST(UserSettings, DisableValidation)
ASSERT_TRUE(settings->get(luci::UserSettings::Key::DisableValidation));
}
+TEST(UserSettings, DisableValidation_NEG)
+{
+ auto settings = luci::UserSettings::settings();
+ ASSERT_NE(nullptr, settings);
+
+ settings->set(luci::UserSettings::Key::DisableValidation, false);
+ ASSERT_FALSE(settings->get(luci::UserSettings::Key::DisableValidation));
+
+ settings->set(luci::UserSettings::Key::DisableValidation, true);
+ ASSERT_FALSE(settings->get(luci::UserSettings::Key::ProfilingDataGen));
+}
+
TEST(UserSettings, ProfilingDataGen)
{
auto settings = luci::UserSettings::settings();
diff --git a/compiler/luci/export/CMakeLists.txt b/compiler/luci/export/CMakeLists.txt
index 5c0077625..2b41a6248 100644
--- a/compiler/luci/export/CMakeLists.txt
+++ b/compiler/luci/export/CMakeLists.txt
@@ -3,7 +3,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
#file(GLOB_RECURSE TESTS "src/*.test.cpp")
#list(REMOVE_ITEM SOURCES ${TESTS})
-add_library(luci_export SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+ set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_export ${LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_export PRIVATE src)
target_include_directories(luci_export PUBLIC include)
target_link_libraries(luci_export PRIVATE luci_lang)
@@ -14,6 +18,7 @@ target_link_libraries(luci_export PRIVATE luci_env)
target_link_libraries(luci_export PRIVATE luci_log)
target_link_libraries(luci_export PRIVATE luci_logex)
target_link_libraries(luci_export PRIVATE luci_profile)
+target_link_libraries(luci_export PRIVATE luci_plan)
target_link_libraries(luci_export PRIVATE nncc_common)
target_link_libraries(luci_export PRIVATE locop)
target_link_libraries(luci_export PRIVATE oops)
diff --git a/compiler/luci/export/src/CircleExportMetadata.cpp b/compiler/luci/export/src/CircleExportMetadata.cpp
index ef905a882..017002f5c 100644
--- a/compiler/luci/export/src/CircleExportMetadata.cpp
+++ b/compiler/luci/export/src/CircleExportMetadata.cpp
@@ -44,6 +44,31 @@ flatbuffers::Offset<circle::Metadata> metadata_offset(flatbuffers::FlatBufferBui
namespace luci
{
+// 'execution_plan_table' is encoded to binary format.
+const std::vector<uint8_t> CircleExportMetadata::encoded_execution_plan_table()
+{
+ std::vector<uint8_t> data;
+
+ write_u32(data, _execution_plan_table.size());
+
+ for (auto &kv : _execution_plan_table)
+ {
+ const auto id = kv.first;
+ write_u32(data, id);
+
+ const auto plan_vector = kv.second;
+ const auto size = plan_vector.size();
+ write_u32(data, size);
+
+ for (auto elem : plan_vector)
+ {
+ write_u32(data, elem);
+ }
+ }
+
+ return data;
+}
+
// 'source_table' is encoded to binary format.
const std::vector<uint8_t> CircleExportMetadata::encoded_source_table(void)
{
@@ -114,7 +139,11 @@ createCircleMetadataVector(flatbuffers::FlatBufferBuilder &builder, luci::Serial
metadata_vec.emplace_back(
metadata_offset(builder, md, md._metadata.encoded_op_table(), "ONE_op_table"));
}
-
+ if (settings->get(luci::UserSettings::Key::ExecutionPlanGen))
+ {
+ metadata_vec.emplace_back(metadata_offset(
+ builder, md, md._metadata.encoded_execution_plan_table(), "ONE_execution_plan_table"));
+ }
return metadata_vec;
}
diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp
index 014d9bd61..be64a52d4 100644
--- a/compiler/luci/export/src/CircleOperationExporter.cpp
+++ b/compiler/luci/export/src/CircleOperationExporter.cpp
@@ -22,6 +22,7 @@
#include <luci/IR/CircleNodes.h>
#include <luci/IR/CircleNodeVisitor.h>
#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Plan/CircleNodeExecutionPlan.h>
#include <luci/UserSettings.h>
#include <luci/Log.h>
@@ -1684,7 +1685,7 @@ void OpExporterLet<OE::CIRC>::visit(luci::CircleInstanceNorm *node)
}
void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md,
- SerializedGraphData &gd)
+ SerializedGraphData &gd, uint32_t node_position)
{
if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
{
@@ -1702,6 +1703,19 @@ void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, Seria
md._metadata.add_op_table(node_id, source->id());
}
}
+ if (has_execution_plan(circle_node))
+ {
+ // Add to node (in node_position) metadata vector with execution_plan information:
+ // order of execution, and offsets output tensors.
+ const auto execution_plan = get_execution_plan(circle_node);
+ std::vector<uint32_t> execution_plan_vector;
+ execution_plan_vector.push_back(execution_plan.order_in_plan());
+ for (auto offset : execution_plan.offsets())
+ {
+ execution_plan_vector.push_back(offset);
+ }
+ md._metadata.add_execution_plan_table(node_position, execution_plan_vector);
+ }
}
else
{
@@ -1717,9 +1731,11 @@ namespace luci
void exportNodes(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &md,
SerializedGraphData &gd)
{
+ uint32_t node_position = 0;
for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
{
- exportNode(node, builder, md, gd);
+ exportNode(node, builder, md, gd, node_position);
+ node_position++;
}
}
diff --git a/compiler/luci/export/src/SerializedData.h b/compiler/luci/export/src/SerializedData.h
index 95f7b5755..a945eecf7 100644
--- a/compiler/luci/export/src/SerializedData.h
+++ b/compiler/luci/export/src/SerializedData.h
@@ -20,6 +20,7 @@
#include <mio/circle/schema_generated.h>
#include <luci/IR/CircleNodes.h>
+#include <luci/IR/ExecutionPlanTable.h>
#include <vector>
@@ -63,13 +64,23 @@ public:
_op_table.at(node_id).emplace(source_id);
}
+ void add_execution_plan_table(uint32_t node_id,
+ const std::vector<uint32_t> &execution_plan_inform)
+ {
+ _execution_plan_table[node_id] = execution_plan_inform;
+ }
+
public:
const std::vector<uint8_t> encoded_source_table(void);
const std::vector<uint8_t> encoded_op_table(void);
+ const std::vector<uint8_t> encoded_execution_plan_table(void);
private:
std::map<uint32_t, std::string> _source_table;
std::map<uint32_t, std::set<uint32_t>> _op_table;
+ // _exec_plan_table stores for node with node_id order of execution, and memory offsets:
+ // first go execution order, then memory offsets for node output tensors.
+ luci::ExecutionPlanTable _execution_plan_table;
};
} // namespace luci
diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt
index 4e200f6ae..1df569d11 100644
--- a/compiler/luci/import/CMakeLists.txt
+++ b/compiler/luci/import/CMakeLists.txt
@@ -2,11 +2,16 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
-add_library(luci_import SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+ set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_import ${LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_import PRIVATE src)
target_include_directories(luci_import PUBLIC include)
target_link_libraries(luci_import PUBLIC luci_lang)
target_link_libraries(luci_import PUBLIC luci_profile)
+target_link_libraries(luci_import PUBLIC luci_plan)
target_link_libraries(luci_import PUBLIC mio_circle)
target_link_libraries(luci_import PRIVATE luci_env)
target_link_libraries(luci_import PRIVATE luci_log)
diff --git a/compiler/luci/import/src/CircleImportMetadata.cpp b/compiler/luci/import/src/CircleImportMetadata.cpp
index f68f3301a..42dcebdaa 100644
--- a/compiler/luci/import/src/CircleImportMetadata.cpp
+++ b/compiler/luci/import/src/CircleImportMetadata.cpp
@@ -134,6 +134,55 @@ decoded_op_table(const std::vector<uint8_t> &op_table_data)
return node_source_ids_map;
}
+// 'execution_plan_table' is decoded to std::map<uint32_t, std::vector<uint32_t>> format.
+const luci::ExecutionPlanTable
+decoded_execution_plan(const std::vector<uint8_t> &execution_plan_data)
+{
+ luci::ExecutionPlanTable execution_plan_table;
+ uint32_t idx = 0;
+
+ if (execution_plan_data.size() < 4)
+ throw std::runtime_error("Op table decode error : invalid entry number");
+
+ uint32_t entry_number = read_u32(execution_plan_data, idx);
+ idx += sizeof(uint32_t);
+
+ while (idx < execution_plan_data.size())
+ {
+ if (idx + 2 * sizeof(uint32_t) > execution_plan_data.size())
+ throw std::runtime_error("Op table decode error : invalid entry item");
+
+ uint32_t id = read_u32(execution_plan_data, idx);
+ idx += sizeof(uint32_t);
+
+ uint32_t size = read_u32(execution_plan_data, idx);
+ idx += sizeof(uint32_t);
+
+ if (idx + sizeof(uint32_t) * size > execution_plan_data.size())
+ throw std::runtime_error("Source table decode error : invalid entry data");
+
+ std::vector<uint32_t> execution_plan_vector;
+ for (uint32_t j = 0; j < size; ++j)
+ {
+ uint32_t execution_plan_inform = read_u32(execution_plan_data, idx);
+ idx += sizeof(uint32_t);
+
+ execution_plan_vector.push_back(execution_plan_inform);
+ }
+
+ if (execution_plan_table.insert({id, execution_plan_vector}).second == false)
+ throw std::runtime_error("Op table decode error : duplicated origin ID");
+ }
+
+ if (idx != execution_plan_data.size())
+ throw std::runtime_error("Op table decode error : data size invalid");
+
+ if (execution_plan_table.size() != entry_number)
+ throw std::runtime_error("Op table decode error : entry number invalid");
+
+ return execution_plan_table;
+}
+
} // namespace
namespace luci
@@ -153,6 +202,8 @@ CircleImportMetadata::CircleImportMetadata(const luci::CircleReader &reader)
_op_table = decoded_op_table(buffer);
else if (meta.name.compare("ONE_source_table") == 0)
_source_table = decoded_source_table(buffer);
+ else if (meta.name.compare("ONE_execution_plan_table") == 0)
+ _execution_plan_table = decoded_execution_plan(buffer);
}
}
diff --git a/compiler/luci/import/src/CircleImportMetadata.h b/compiler/luci/import/src/CircleImportMetadata.h
index 007985dcc..0e0240678 100644
--- a/compiler/luci/import/src/CircleImportMetadata.h
+++ b/compiler/luci/import/src/CircleImportMetadata.h
@@ -20,6 +20,7 @@
#include "luci/Import/CircleReader.h"
#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/IR/ExecutionPlanTable.h>
#include <map>
#include <set>
@@ -47,10 +48,15 @@ public:
const std::map<uint32_t, std::string> &source_table(void) const { return _source_table; }
+ const luci::ExecutionPlanTable &execution_plan_table(void) const { return _execution_plan_table; }
+
private:
// Decoded metadata is stored
std::map<uint32_t, std::string> _source_table;
std::map<uint32_t, std::set<uint32_t>> _op_table;
+ // _execution_plan_table stores for node with node_id order of execution,
+ // and offsets output tensors
+ luci::ExecutionPlanTable _execution_plan_table;
};
} // namespace luci
diff --git a/compiler/luci/import/src/Importer.cpp b/compiler/luci/import/src/Importer.cpp
index 68baefab0..8eae5fcf4 100644
--- a/compiler/luci/import/src/Importer.cpp
+++ b/compiler/luci/import/src/Importer.cpp
@@ -28,6 +28,7 @@
#include <luci/IR/CircleNodes.h>
#include <luci/Profile/CircleNodeID.h>
#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Plan/CircleNodeExecutionPlan.h>
#include <luci/Log.h>
#include <luci/LogHelper.h>
@@ -344,6 +345,25 @@ std::unique_ptr<Module> Importer::importModule(const circle::Model *model) const
module->source_table(table);
}
+ // Add execution_plan annotations
+ if (circle_metadata->execution_plan_table().size() > 0)
+ {
+ auto execution_plan_table = circle_metadata->execution_plan_table();
+ auto node_position = 0;
+ for (auto node : loco::postorder_traversal(loco::output_nodes(module->graph())))
+ {
+ if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
+ {
+ auto node_plan = execution_plan_table[node_position];
+ luci::add_execution_plan(
+ circle_node,
+ luci::CircleNodeExecutionPlan(
+ node_plan[0], std::vector<uint32_t>(node_plan.begin() + 1, node_plan.end())));
+ }
+ node_position++;
+ }
+ }
+
return module;
}
diff --git a/compiler/luci/lang/CMakeLists.txt b/compiler/luci/lang/CMakeLists.txt
index 669a866b1..433b7cd4e 100644
--- a/compiler/luci/lang/CMakeLists.txt
+++ b/compiler/luci/lang/CMakeLists.txt
@@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
-add_library(luci_lang SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+ set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_lang ${LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_lang PRIVATE src)
target_include_directories(luci_lang PUBLIC include)
target_link_libraries(luci_lang PUBLIC loco)
diff --git a/compiler/luci/lang/include/luci/IR/ExecutionPlanTable.h b/compiler/luci/lang/include/luci/IR/ExecutionPlanTable.h
new file mode 100644
index 000000000..5c33c1123
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/ExecutionPlanTable.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_EXECUTION_PLAN_TABLE_H__
+#define __LUCI_EXECUTION_PLAN_TABLE_H__
+
+namespace luci
+{
+
+using ExecutionPlanTable = std::map<uint32_t, std::vector<uint32_t>>;
+
+} // namespace luci
+
+#endif // __LUCI_EXECUTION_PLAN_TABLE_H__
diff --git a/compiler/luci/log/CMakeLists.txt b/compiler/luci/log/CMakeLists.txt
index 23bd00828..b64a0651e 100644
--- a/compiler/luci/log/CMakeLists.txt
+++ b/compiler/luci/log/CMakeLists.txt
@@ -1,7 +1,11 @@
# TODO Find how to test logging framework
file(GLOB_RECURSE SOURCES "src/*.cpp")
-add_library(luci_log SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+ set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_log ${LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_log PUBLIC include)
target_link_libraries(luci_log PUBLIC hermes)
target_link_libraries(luci_log PRIVATE hermes_std)
diff --git a/compiler/luci/logex/CMakeLists.txt b/compiler/luci/logex/CMakeLists.txt
index cd2571ba1..4d801b046 100644
--- a/compiler/luci/logex/CMakeLists.txt
+++ b/compiler/luci/logex/CMakeLists.txt
@@ -1,7 +1,11 @@
# TODO Find how to test logging-ex utility
file(GLOB_RECURSE SOURCES "src/*.cpp")
-add_library(luci_logex SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+ set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_logex ${LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_logex PUBLIC include)
target_link_libraries(luci_logex PUBLIC loco)
target_link_libraries(luci_logex PUBLIC locop)
diff --git a/compiler/luci/partition/CMakeLists.txt b/compiler/luci/partition/CMakeLists.txt
index 236b689c4..eacbe1ccc 100644
--- a/compiler/luci/partition/CMakeLists.txt
+++ b/compiler/luci/partition/CMakeLists.txt
@@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
-add_library(luci_partition SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+ set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_partition ${LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_partition PRIVATE src)
target_include_directories(luci_partition PUBLIC include)
target_link_libraries(luci_partition PUBLIC luci_lang)
diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt
index fd06c6d52..2361bb4f5 100644
--- a/compiler/luci/pass/CMakeLists.txt
+++ b/compiler/luci/pass/CMakeLists.txt
@@ -1,8 +1,18 @@
+nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
+if(NOT FlatBuffers_FOUND)
+ message(STATUS "FlatBuffers NOT FOUND")
+ return()
+endif(NOT FlatBuffers_FOUND)
+
file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
-add_library(luci_pass SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+ set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_pass ${LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_pass PRIVATE src)
target_include_directories(luci_pass PUBLIC include)
target_link_libraries(luci_pass PUBLIC loco)
@@ -13,9 +23,11 @@ target_link_libraries(luci_pass PRIVATE luci_log)
target_link_libraries(luci_pass PRIVATE luci_service)
target_link_libraries(luci_pass PRIVATE luci_logex)
target_link_libraries(luci_pass PRIVATE luci_profile)
+target_link_libraries(luci_pass PRIVATE mio_tflite260_inc)
target_link_libraries(luci_pass PRIVATE nncc_common)
target_link_libraries(luci_pass PRIVATE pepper_csv2vec)
target_link_libraries(luci_pass PRIVATE oops)
+target_link_libraries(luci_pass PRIVATE flatbuffers-1.12)
install(TARGETS luci_pass DESTINATION lib)
install(DIRECTORY include/ DESTINATION include
FILES_MATCHING PATTERN "*.h")
@@ -31,4 +43,5 @@ target_include_directories(luci_pass_test PRIVATE src)
target_link_libraries(luci_pass_test luci_pass)
target_link_libraries(luci_pass_test luci_lang)
target_link_libraries(luci_pass_test luci_testhelper)
+target_link_libraries(luci_pass_test flatbuffers-1.12)
#target_link_libraries(luci_pass_test oops)
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h
index 3bcc7c5bb..917cacae9 100644
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -34,6 +34,7 @@ public:
{
enum Algorithm
{
+ FuseAddWithFullyConnected,
FuseAddWithTConv,
FuseBatchNormWithConv,
FuseBatchNormWithDwConv,
@@ -51,8 +52,10 @@ public:
Requantize,
FoldAddV2,
FoldCast,
+ FoldDepthwiseConv2D,
FoldDequantize,
FoldSparseToDense,
+ ForceQuantParam,
ForwardReshapeToUnaryOp,
SparsifyTensorPass,
FusePreActivationBatchNorm,
@@ -64,7 +67,9 @@ public:
ReplaceSubWithAdd,
SubstitutePackToReshape,
SubstitutePadV2ToPad,
+ SubstituteSplitVToSplit,
SubstituteSqueezeToReshape,
+ ExpandBroadcastConst,
ConvertNCHWToNHWC,
RemoveUnnecessarySlice,
RemoveUnnecessaryStridedSlice,
@@ -82,9 +87,12 @@ public:
enum AlgorithmParameters
{
// quantize
- Quantize_input_dtype,
- Quantize_output_dtype,
+ Quantize_input_model_dtype,
+ Quantize_output_model_dtype,
Quantize_granularity, // layer-wise or channel-wise
+ Quantize_tensor_names,
+ Quantize_scales,
+ Quantize_zero_points,
// sparsify
Sparsify_tensor_name,
@@ -96,6 +104,9 @@ public:
// convert NCHW to NHWC
NCHW_to_NHWC_input_shape,
NCHW_to_NHWC_output_shape,
+
+ Quantize_input_dtype = Quantize_input_model_dtype, // TODO Remove this
+ Quantize_output_dtype = Quantize_output_model_dtype, // TODO Remove this
};
virtual ~Options() = default;
@@ -104,6 +115,8 @@ public:
virtual bool query(Algorithm) = 0;
virtual void param(AlgorithmParameters, const std::string &) = 0;
virtual const std::string param(AlgorithmParameters) const = 0;
+ virtual void params(AlgorithmParameters, std::vector<std::string> &) = 0;
+ virtual std::vector<std::string> params(AlgorithmParameters) const = 0;
};
public:
diff --git a/compiler/luci/pass/include/luci/Pass/ExpandBroadcastConstPass.h b/compiler/luci/pass/include/luci/Pass/ExpandBroadcastConstPass.h
new file mode 100644
index 000000000..5ee26b472
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ExpandBroadcastConstPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_EXPAND_BROADCAST_CONST_PASS_H__
+#define __LUCI_EXPAND_BROADCAST_CONST_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to remove broadcasts of Const nodes.
+ */
+struct ExpandBroadcastConstPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::ExpandBroadcastConstPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_EXPAND_BROADCAST_CONST_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldDepthwiseConv2DPass.h b/compiler/luci/pass/include/luci/Pass/FoldDepthwiseConv2DPass.h
new file mode 100644
index 000000000..58e5b71a7
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldDepthwiseConv2DPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_DEPTHWISE_CONV_2D_PASS_H__
+#define __LUCI_FOLD_DEPTHWISE_CONV_2D_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to fold DepthwiseConv2D with constant input and filter into a
+ * constant tensor
+ */
+struct FoldDepthwiseConv2DPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FoldDepthwiseConv2DPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_DEPTHWISE_CONV_2D_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ForceQuantParamPass.h b/compiler/luci/pass/include/luci/Pass/ForceQuantParamPass.h
new file mode 100644
index 000000000..752ce1d31
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ForceQuantParamPass.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FORCE_QUANT_PARAM_PASS_H__
+#define __LUCI_FORCE_QUANT_PARAM_PASS_H__
+
+#include <loco.h>
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to write quantparam (scale, zerop) to the specified tensors
+ */
+class ForceQuantParamPass : public logo::Pass
+{
+public:
+ using TensorVector = std::vector<std::string>;
+ using ScaleVector = std::vector<float>;
+ using ZPVector = std::vector<int64_t>;
+
+public:
+ ForceQuantParamPass(TensorVector &tensors, ScaleVector &scales, ZPVector &zerops)
+ : _tensors{tensors}, _scales{scales}, _zerops{zerops}
+ {
+ // DO NOTHING
+ }
+ virtual const char *name(void) const { return "luci::ForceQuantParamPass"; }
+
+public:
+ bool run(loco::Graph *graph);
+
+private:
+ TensorVector _tensors;
+ ScaleVector _scales;
+ ZPVector _zerops;
+};
+
+} // namespace luci
+
+#endif //__LUCI_FORCE_QUANT_PARAM_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseAddWithFullyConnectedPass.h b/compiler/luci/pass/include/luci/Pass/FuseAddWithFullyConnectedPass.h
new file mode 100644
index 000000000..a59b644e9
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseAddWithFullyConnectedPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_ADD_WITH_FULLY_CONNECTED_PASS_H__
+#define __LUCI_FUSE_ADD_WITH_FULLY_CONNECTED_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to fuse Add into FullyConnected
+ */
+struct FuseAddWithFullyConnectedPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::FuseAddWithFullyConnectedPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_ADD_WITH_FULLY_CONNECTED_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
index 78e7323f9..68765ec5b 100644
--- a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
@@ -32,9 +32,10 @@ namespace luci
class QuantizeDequantizeWeightsPass : public logo::Pass
{
public:
- QuantizeDequantizeWeightsPass(loco::DataType input_dtype, loco::DataType output_dtype,
+ QuantizeDequantizeWeightsPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
QuantizationGranularity granularity)
- : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity}
+ : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, _granularity{
+ granularity}
{
// DO NOTHING
}
@@ -44,8 +45,8 @@ public:
bool run(loco::Graph *graph);
private:
- loco::DataType _input_dtype;
- loco::DataType _output_dtype;
+ loco::DataType _input_model_dtype;
+ loco::DataType _output_model_dtype;
QuantizationGranularity _granularity;
};
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
index 9520910d5..d618a07b6 100644
--- a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
@@ -32,9 +32,10 @@ namespace luci
class QuantizeWithMinMaxPass : public logo::Pass
{
public:
- QuantizeWithMinMaxPass(loco::DataType input_dtype, loco::DataType output_dtype,
+ QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
QuantizationGranularity granularity)
- : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity}
+ : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, _granularity{
+ granularity}
{
// DO NOTHING
}
@@ -44,8 +45,8 @@ public:
bool run(loco::Graph *graph);
private:
- loco::DataType _input_dtype;
- loco::DataType _output_dtype;
+ loco::DataType _input_model_dtype;
+ loco::DataType _output_model_dtype;
QuantizationGranularity _granularity;
};
diff --git a/compiler/luci/pass/include/luci/Pass/SubstituteSplitVToSplitPass.h b/compiler/luci/pass/include/luci/Pass/SubstituteSplitVToSplitPass.h
new file mode 100644
index 000000000..8c8900159
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/SubstituteSplitVToSplitPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SUBSTITUTE_SPLIT_V_TO_SPLIT_PASS_H__
+#define __LUCI_SUBSTITUTE_SPLIT_V_TO_SPLIT_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Class to substitute certain SplitV to Split.
+ */
+struct SubstituteSplitVToSplitPass final : public logo::Pass
+{
+ const char *name(void) const final { return "luci::SubstituteSplitVToSplitPass"; }
+
+ bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SUBSTITUTE_SPLIT_V_TO_SPLIT_PASS_H__
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
index 98c22a07a..5d0c92625 100644
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -17,12 +17,16 @@
#include "luci/CircleOptimizer.h"
#include "luci/Pass/ConvertNCHWToNHWCPass.h"
+#include "luci/Pass/ExpandBroadcastConstPass.h"
#include "luci/Pass/FoldAddV2Pass.h"
#include "luci/Pass/FoldCastPass.h"
+#include "luci/Pass/FoldDepthwiseConv2DPass.h"
#include "luci/Pass/FoldDequantizePass.h"
#include "luci/Pass/FoldSparseToDensePass.h"
#include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
+#include "luci/Pass/ForceQuantParamPass.h"
#include "luci/Pass/FuseActivationFunctionPass.h"
+#include "luci/Pass/FuseAddWithFullyConnectedPass.h"
#include "luci/Pass/FuseAddWithTConvPass.h"
#include "luci/Pass/FuseBatchNormWithConvPass.h"
#include "luci/Pass/FuseBatchNormWithDwConvPass.h"
@@ -55,6 +59,7 @@
#include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h"
#include "luci/Pass/SubstitutePackToReshapePass.h"
#include "luci/Pass/SubstitutePadV2ToPadPass.h"
+#include "luci/Pass/SubstituteSplitVToSplitPass.h"
#include "luci/Pass/SubstituteSqueezeToReshapePass.h"
#include "luci/Pass/SubstituteStridedSliceToReshapePass.h"
#include "luci/Pass/SubstituteTransposeToReshapePass.h"
@@ -86,17 +91,37 @@ namespace
using namespace luci;
+template <typename T> T lexical_cast(const std::string &str)
+{
+ std::istringstream ss;
+ ss.str(str);
+ T data;
+ ss >> data;
+ return data;
+}
+
+template <typename T> std::vector<T> lexical_cast(std::vector<std::string> &sv)
+{
+ std::vector<T> result;
+ std::transform(sv.begin(), sv.end(), std::back_inserter(result),
+ [](std::string str) -> T { return lexical_cast<T>(str); });
+ return result;
+}
+
class OptimizeOptionsImpl final : public luci::CircleOptimizer::Options
{
public:
void enable(Algorithm) final;
void param(AlgorithmParameters, const std::string &) final;
const std::string param(AlgorithmParameters) const final;
+ void params(AlgorithmParameters, std::vector<std::string> &) final;
+ std::vector<std::string> params(AlgorithmParameters) const final;
bool query(Algorithm) final;
private:
std::vector<Algorithm> _algorithms;
std::map<AlgorithmParameters, const std::string> _algorithm_params;
+ std::map<AlgorithmParameters, std::vector<std::string>> _multiple_params;
};
void OptimizeOptionsImpl::enable(Algorithm algo) { _algorithms.push_back(algo); }
@@ -119,6 +144,24 @@ const std::string OptimizeOptionsImpl::param(AlgorithmParameters param) const
}
}
+void OptimizeOptionsImpl::params(AlgorithmParameters param, std::vector<std::string> &vec)
+{
+ _multiple_params[param] = vec;
+}
+
+std::vector<std::string> OptimizeOptionsImpl::params(AlgorithmParameters param) const
+{
+ auto param_vec = _multiple_params.find(param);
+ if (param_vec != _multiple_params.end())
+ {
+ return param_vec->second;
+ }
+ else
+ {
+ return std::vector<std::string>();
+ }
+}
+
bool OptimizeOptionsImpl::query(Algorithm algo)
{
std::vector<Algorithm>::iterator it = std::find(_algorithms.begin(), _algorithms.end(), algo);
@@ -237,6 +280,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
{
phase.emplace_back(std::make_unique<FuseBatchNormWithTConvPass>());
}
+ if (_options->query(Options::Algorithm::FuseAddWithFullyConnected))
+ {
+ phase.emplace_back(std::make_unique<FuseAddWithFullyConnectedPass>());
+ }
if (_options->query(Options::Algorithm::FuseAddWithTConv))
{
phase.emplace_back(std::make_unique<FuseAddWithTConvPass>());
@@ -257,6 +304,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
{
phase.emplace_back(std::make_unique<luci::FoldCastPass>());
}
+ if (_options->query(Options::Algorithm::FoldDepthwiseConv2D))
+ {
+ phase.emplace_back(std::make_unique<luci::FoldDepthwiseConv2DPass>());
+ }
if (_options->query(Options::Algorithm::FoldDequantize))
{
phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
@@ -281,6 +332,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
{
phase.emplace_back(std::make_unique<luci::ShuffleWeightTo16x1Float32Pass>());
}
+ if (_options->query(Options::Algorithm::ExpandBroadcastConst))
+ {
+ phase.emplace_back(std::make_unique<luci::ExpandBroadcastConstPass>());
+ }
if (_options->query(Options::Algorithm::RemoveFakeQuant))
{
phase.emplace_back(std::make_unique<luci::RemoveFakeQuantPass>());
@@ -329,6 +384,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
{
phase.emplace_back(std::make_unique<luci::SubstitutePadV2ToPadPass>());
}
+ if (_options->query(Options::Algorithm::SubstituteSplitVToSplit))
+ {
+ phase.emplace_back(std::make_unique<luci::SubstituteSplitVToSplitPass>());
+ }
if (_options->query(Options::Algorithm::SubstituteSqueezeToReshape))
{
phase.emplace_back(std::make_unique<luci::SubstituteSqueezeToReshapePass>());
@@ -363,28 +422,30 @@ void CircleOptimizer::quantize(loco::Graph *g) const
// Fake quantization of weights
if (_options->query(Options::Algorithm::QuantizeDequantizeWeights))
{
- static const std::vector<std::string> fakeq_supported_input_dtype{"float32"};
- static const std::vector<std::string> fakeq_supported_output_dtype{"uint8", "int16"};
+ static const std::vector<std::string> fakeq_supported_input_model_dtype{"float32"};
+ static const std::vector<std::string> fakeq_supported_output_model_dtype{"uint8", "int16"};
static const std::vector<std::string> fakeq_supported_granularity{"layer", "channel"};
- auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
- auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
+ auto input_model_dtype =
+ _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+ auto output_model_dtype =
+ _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
- if (!in_array(to_lower_case(input_dtype), fakeq_supported_input_dtype))
+ if (!in_array(to_lower_case(input_model_dtype), fakeq_supported_input_model_dtype))
throw std::runtime_error("Unsupported input type. List of supported input type: " +
- to_string(fakeq_supported_input_dtype));
+ to_string(fakeq_supported_input_model_dtype));
- if (!in_array(to_lower_case(output_dtype), fakeq_supported_output_dtype))
+ if (!in_array(to_lower_case(output_model_dtype), fakeq_supported_output_model_dtype))
throw std::runtime_error("Unsupported output type. List of supported output type: " +
- to_string(fakeq_supported_output_dtype));
+ to_string(fakeq_supported_output_model_dtype));
if (!in_array(to_lower_case(granularity), fakeq_supported_granularity))
throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
to_string(fakeq_supported_granularity));
if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
- str_to_dtype(output_dtype) != loco::DataType::U8)
+ str_to_dtype(output_model_dtype) != loco::DataType::U8)
throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
// Clear existing quantparams before doing fake quantization
@@ -395,39 +456,43 @@ void CircleOptimizer::quantize(loco::Graph *g) const
circle_node->quantparam(nullptr);
}
- luci::QuantizeDequantizeWeightsPass fake_quantizer(
- str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity));
+ luci::QuantizeDequantizeWeightsPass fake_quantizer(str_to_dtype(input_model_dtype),
+ str_to_dtype(output_model_dtype),
+ str_to_granularity(granularity));
fake_quantizer.run(g);
}
// Actual quantization of weights, bias, and activation
if (_options->query(Options::Algorithm::QuantizeWithMinMax))
{
- static const std::vector<std::string> qwmm_supported_input_dtype{"float32"};
- static const std::vector<std::string> qwmm_supported_output_dtype{"uint8", "int16"};
+ static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"};
+ static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"};
static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
- auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
- auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
+ auto input_model_dtype =
+ _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+ auto output_model_dtype =
+ _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
- if (!in_array(to_lower_case(input_dtype), qwmm_supported_input_dtype))
+ if (!in_array(to_lower_case(input_model_dtype), qwmm_supported_input_model_dtype))
throw std::runtime_error("Unsupported input type. List of supported input types: " +
- to_string(qwmm_supported_input_dtype));
+ to_string(qwmm_supported_input_model_dtype));
- if (!in_array(to_lower_case(output_dtype), qwmm_supported_output_dtype))
+ if (!in_array(to_lower_case(output_model_dtype), qwmm_supported_output_model_dtype))
throw std::runtime_error("Unsupported output type. List of supported output types: " +
- to_string(qwmm_supported_output_dtype));
+ to_string(qwmm_supported_output_model_dtype));
if (!in_array(to_lower_case(granularity), qwmm_supported_granularity))
throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
to_string(qwmm_supported_granularity));
if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
- str_to_dtype(output_dtype) != loco::DataType::U8)
+ str_to_dtype(output_model_dtype) != loco::DataType::U8)
throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
- luci::QuantizeWithMinMaxPass quantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype),
+ luci::QuantizeWithMinMaxPass quantizer(str_to_dtype(input_model_dtype),
+ str_to_dtype(output_model_dtype),
str_to_granularity(granularity));
quantizer.run(g);
@@ -446,7 +511,7 @@ void CircleOptimizer::quantize(loco::Graph *g) const
phase_runner.run(phase);
// Verify the type/granularity of the quantized model
- luci::QuantizedModelVerifier verifier(str_to_dtype(output_dtype),
+ luci::QuantizedModelVerifier verifier(str_to_dtype(output_model_dtype),
str_to_granularity(granularity));
verifier.verify(g);
}
@@ -454,24 +519,44 @@ void CircleOptimizer::quantize(loco::Graph *g) const
// Requantize
if (_options->query(Options::Algorithm::Requantize))
{
- static const std::vector<std::string> rq_supported_input_dtype{"int8"};
- static const std::vector<std::string> rq_supported_output_dtype{"uint8"};
+ static const std::vector<std::string> rq_supported_input_model_dtype{"int8"};
+ static const std::vector<std::string> rq_supported_output_model_dtype{"uint8"};
- auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
- auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
+ auto input_model_dtype =
+ _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+ auto output_model_dtype =
+ _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
- if (!in_array(to_lower_case(input_dtype), rq_supported_input_dtype))
+ if (!in_array(to_lower_case(input_model_dtype), rq_supported_input_model_dtype))
throw std::runtime_error("Unsupported input type. List of supported input types: " +
- to_string(rq_supported_input_dtype));
+ to_string(rq_supported_input_model_dtype));
- if (!in_array(to_lower_case(output_dtype), rq_supported_output_dtype))
+ if (!in_array(to_lower_case(output_model_dtype), rq_supported_output_model_dtype))
throw std::runtime_error("Unsupported output type. List of supported output types: " +
- to_string(rq_supported_output_dtype));
+ to_string(rq_supported_output_model_dtype));
- luci::RequantizePass requantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype));
+ luci::RequantizePass requantizer(str_to_dtype(input_model_dtype),
+ str_to_dtype(output_model_dtype));
requantizer.run(g);
}
+ // Force to write quantparam to specified tensors
+ // NOTE Only per-tensor (not per-channel) qparam can be written
+ if (_options->query(Options::Algorithm::ForceQuantParam))
+ {
+ ForceQuantParamPass::TensorVector tensors =
+ _options->params(Options::AlgorithmParameters::Quantize_tensor_names);
+ auto str_scales = _options->params(Options::AlgorithmParameters::Quantize_scales);
+ auto str_zero_points = _options->params(Options::AlgorithmParameters::Quantize_zero_points);
+
+ // Cast scales/zero_points to proper types
+ ForceQuantParamPass::ScaleVector scales = lexical_cast<float>(str_scales);
+ ForceQuantParamPass::ZPVector zero_points = lexical_cast<int64_t>(str_zero_points);
+
+ ForceQuantParamPass fq(tensors, scales, zero_points);
+ fq.run(g);
+ }
+
logo::Phase phase;
// Do Shape/Type inference
diff --git a/compiler/luci/pass/src/CircleOptimizer.test.cpp b/compiler/luci/pass/src/CircleOptimizer.test.cpp
index 43d96feaf..a1b5c7f80 100644
--- a/compiler/luci/pass/src/CircleOptimizer.test.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.test.cpp
@@ -33,6 +33,7 @@ TEST(CircleOptimizerTest, optimize_algorithms)
// TODO add more if needed
options->enable(Algorithms::FoldAddV2);
options->enable(Algorithms::FoldCast);
+ options->enable(Algorithms::FoldDepthwiseConv2D);
options->enable(Algorithms::FoldDequantize);
options->enable(Algorithms::FoldSparseToDense);
options->enable(Algorithms::FusePreActivationBatchNorm);
@@ -45,6 +46,7 @@ TEST(CircleOptimizerTest, optimize_algorithms)
options->enable(Algorithms::SubstituteStridedSliceToReshape);
options->enable(Algorithms::SubstituteTransposeToReshape);
options->enable(Algorithms::ConvertNCHWToNHWC);
+ options->enable(Algorithms::ExpandBroadcastConst);
o.optimize(&g);
@@ -78,8 +80,8 @@ TEST(CircleOptimizerTest, quantize_quantdequant_simple)
auto options = o.options();
options->enable(Algorithms::QuantizeDequantizeWeights);
- options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
- options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
options->param(AlgorithmParameters::Quantize_granularity, "layer");
o.quantize(&g);
@@ -95,8 +97,8 @@ TEST(CircleOptimizerTest, quantize_quantdequant_input_NEG)
auto options = o.options();
options->enable(Algorithms::QuantizeDequantizeWeights);
- options->param(AlgorithmParameters::Quantize_input_dtype, "invalid");
- options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
options->param(AlgorithmParameters::Quantize_granularity, "layer");
EXPECT_THROW(o.quantize(&g), std::runtime_error);
@@ -110,8 +112,8 @@ TEST(CircleOptimizerTest, quantize_quantdequant_output_NEG)
auto options = o.options();
options->enable(Algorithms::QuantizeDequantizeWeights);
- options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
- options->param(AlgorithmParameters::Quantize_output_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
options->param(AlgorithmParameters::Quantize_granularity, "layer");
EXPECT_THROW(o.quantize(&g), std::runtime_error);
@@ -125,8 +127,8 @@ TEST(CircleOptimizerTest, quantize_quantdequant_gran_NEG)
auto options = o.options();
options->enable(Algorithms::QuantizeDequantizeWeights);
- options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
- options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
options->param(AlgorithmParameters::Quantize_granularity, "invalid");
EXPECT_THROW(o.quantize(&g), std::runtime_error);
@@ -140,8 +142,8 @@ TEST(CircleOptimizerTest, quantize_minmax_simple)
auto options = o.options();
options->enable(Algorithms::QuantizeWithMinMax);
- options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
- options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
options->param(AlgorithmParameters::Quantize_granularity, "layer");
o.quantize(&g);
@@ -157,8 +159,8 @@ TEST(CircleOptimizerTest, quantize_minmax_input_NEG)
auto options = o.options();
options->enable(Algorithms::QuantizeWithMinMax);
- options->param(AlgorithmParameters::Quantize_input_dtype, "invalid");
- options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
options->param(AlgorithmParameters::Quantize_granularity, "layer");
EXPECT_THROW(o.quantize(&g), std::runtime_error);
@@ -172,8 +174,8 @@ TEST(CircleOptimizerTest, quantize_minmax_output_NEG)
auto options = o.options();
options->enable(Algorithms::QuantizeWithMinMax);
- options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
- options->param(AlgorithmParameters::Quantize_output_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
options->param(AlgorithmParameters::Quantize_granularity, "layer");
EXPECT_THROW(o.quantize(&g), std::runtime_error);
@@ -187,8 +189,8 @@ TEST(CircleOptimizerTest, quantize_minmax_gran_NEG)
auto options = o.options();
options->enable(Algorithms::QuantizeWithMinMax);
- options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
- options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
options->param(AlgorithmParameters::Quantize_granularity, "invalid");
EXPECT_THROW(o.quantize(&g), std::runtime_error);
@@ -202,8 +204,8 @@ TEST(CircleOptimizerTest, quantize_requant_simple)
auto options = o.options();
options->enable(Algorithms::Requantize);
- options->param(AlgorithmParameters::Quantize_input_dtype, "int8");
- options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
o.quantize(&g);
@@ -218,8 +220,8 @@ TEST(CircleOptimizerTest, quantize_requant_input_NEG)
auto options = o.options();
options->enable(Algorithms::Requantize);
- options->param(AlgorithmParameters::Quantize_input_dtype, "invalid");
- options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
EXPECT_THROW(o.quantize(&g), std::runtime_error);
}
@@ -232,8 +234,8 @@ TEST(CircleOptimizerTest, quantize_requant_output_NEG)
auto options = o.options();
options->enable(Algorithms::Requantize);
- options->param(AlgorithmParameters::Quantize_input_dtype, "int8");
- options->param(AlgorithmParameters::Quantize_output_dtype, "invalid");
+ options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8");
+ options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
EXPECT_THROW(o.quantize(&g), std::runtime_error);
}
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
index 95e23e1b8..270714049 100644
--- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
@@ -28,6 +28,22 @@
namespace
{
+bool is_same_shape(const luci::CircleNode *node, const std::vector<loco::Dimension> &shape)
+{
+ if (not node)
+ return false;
+
+ if (shape.size() != node->rank())
+ return false;
+
+ for (uint32_t i = 0; i < shape.size(); i++)
+ {
+ if (not(node->dim(i) == shape[i]))
+ return false;
+ }
+ return true;
+}
+
enum class DataFormat
{
NCHW,
@@ -465,7 +481,7 @@ bool is_NCHW_with_s_const(const T *node, luci::CircleNode *&pred_node,
//
// Find MUL with an NCHW pattern described below
// - Input (non-constant) shape : [N, C, H, W]
-// - Input (constant) shape : [1, C, 1, 1] or a scalar (1)
+// - Input (constant) shape : [1, C, 1, 1], [N, C, H, W] or a scalar (1)
// - Output shape : [N, C, H, W]
bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_node,
luci::CircleConst *&multiplier)
@@ -497,26 +513,22 @@ bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_nod
if (const_rank != 4 && const_rank != 0 && const_rank != 1)
return false;
- if (const_rank == 4)
- {
- for (uint32_t i = 0; i < const_rank; i++)
- {
- if (i != 1 && multiplier->dim(i).value() != 1)
- return false;
- }
- }
-
const auto input_cdim = pred_node->dim(1);
const auto output_cdim = node->dim(1);
if (const_rank == 4)
{
- const auto const_cdim = multiplier->dim(1);
- // Check Input, Output, Const have the same channel size
- if (const_cdim == input_cdim && input_cdim == output_cdim)
- return true;
- else
- return false;
+ bool supported_shape = false;
+
+ // Check multiplier is (1, C, 1, 1)
+ if (is_same_shape(multiplier, {1, node->dim(1), 1, 1}))
+ supported_shape = true;
+
+ // Check multiplier is (N, C, H, W)
+ if (is_same_shape(multiplier, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)}))
+ supported_shape = true;
+
+ return supported_shape;
}
if (input_cdim == output_cdim)
return true;
@@ -527,7 +539,7 @@ bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_nod
// We assume ADD with const input is NCHW if,
// Input shape: (N, C, H, W)
// Output shape: (N, C, H, W)
-// 1. Const shape is (1, C, 1, 1) or a scalar (1)
+// 1. Const shape is (1, C, 1, 1), (N, C, H, W) or a scalar (1)
// 2. Input, Output, Const have the same C.
bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_node,
luci::CircleConst *&beta)
@@ -559,30 +571,22 @@ bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_nod
if (const_rank != 4 && const_rank != 0 && const_rank != 1)
return false;
- if (const_rank == 4)
- {
- // Check the shape is (1, C, 1, 1)
- for (uint32_t i = 0; i < const_rank; i++)
- {
- if (i == 1)
- continue;
-
- if (beta->dim(i).value() != 1)
- return false;
- }
- }
-
const auto input_cdim = pred_node->dim(1);
const auto output_cdim = node->dim(1);
if (const_rank == 4)
{
- const auto const_cdim = beta->dim(1);
- // Check Input, Output, Const have the same channel size
- if (const_cdim == input_cdim && input_cdim == output_cdim)
- return true;
- else
- return false;
+ bool supported_shape = false;
+
+ // Check beta is (1, C, 1, 1)
+ if (is_same_shape(beta, {1, node->dim(1), 1, 1}))
+ supported_shape = true;
+
+ // Check beta is (N, C, H, W)
+ if (is_same_shape(beta, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)}))
+ supported_shape = true;
+
+ return supported_shape;
}
if (input_cdim == output_cdim)
return true;
@@ -593,7 +597,7 @@ bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_nod
// We assume SUB with const input is NCHW if,
// Input shape: (N, C, H, W)
// Output shape: (N, C, H, W)
-// 1. Const shape is (1, C, 1, 1) or a scalar (1)
+// 1. Const shape is (1, C, 1, 1), (N, C, H, W) or a scalar (1)
// 2. Input, Output, Const have the same C.
bool is_NCHW_with_const(const luci::CircleSub *node, const luci::CircleNode *pred_node,
const luci::CircleConst *subtract)
@@ -609,30 +613,22 @@ bool is_NCHW_with_const(const luci::CircleSub *node, const luci::CircleNode *pre
if (const_rank != 4 && const_rank != 0 && const_rank != 1)
return false;
- if (const_rank == 4)
- {
- // Check the shape is (1, C, 1, 1)
- for (uint32_t i = 0; i < const_rank; i++)
- {
- if (i == 1)
- continue;
-
- if (subtract->dim(i).value() != 1)
- return false;
- }
- }
-
const auto input_cdim = pred_node->dim(1);
const auto output_cdim = node->dim(1);
if (const_rank == 4)
{
- const auto const_cdim = subtract->dim(1);
- // Check Input, Output, Const have the same channel size
- if (const_cdim == input_cdim && input_cdim == output_cdim)
- return true;
- else
- return false;
+ bool supported_shape = false;
+
+ // Check subtract is (1, C, 1, 1)
+ if (is_same_shape(subtract, {1, node->dim(1), 1, 1}))
+ supported_shape = true;
+
+ // Check subtract is (N, C, H, W)
+ if (is_same_shape(subtract, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)}))
+ supported_shape = true;
+
+ return supported_shape;
}
if (input_cdim == output_cdim)
return true;
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
index d844246f8..c9412fbb1 100644
--- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
@@ -130,6 +130,19 @@ protected:
}
public:
+ void update_const_shape_to_nchw(void)
+ {
+ uint32_t channel_size = 16;
+ beta->shape({1, channel_size, 4, 4});
+
+ beta->size<loco::DataType::FLOAT32>(channel_size * 4 * 4);
+ for (uint32_t i = 0; i < channel_size; i++)
+ {
+ beta->at<loco::DataType::FLOAT32>(i) = i;
+ }
+ }
+
+public:
luci::CircleAdd *add = nullptr;
luci::CircleConst *beta = nullptr;
};
@@ -421,6 +434,19 @@ protected:
}
public:
+ void update_const_shape_to_nchw(void)
+ {
+ uint32_t channel_size = 16;
+ multiplier->shape({1, channel_size, 4, 4});
+
+ multiplier->size<loco::DataType::FLOAT32>(channel_size * 4 * 4);
+ for (uint32_t i = 0; i < channel_size; i++)
+ {
+ multiplier->at<loco::DataType::FLOAT32>(i) = i;
+ }
+ }
+
+public:
luci::CircleMul *mul = nullptr;
luci::CircleConst *multiplier = nullptr;
};
@@ -696,6 +722,19 @@ protected:
}
public:
+ void update_const_shape_to_nchw(void)
+ {
+ uint32_t channel_size = 16;
+ beta->shape({1, channel_size, 4, 4});
+
+ beta->size<loco::DataType::FLOAT32>(channel_size * 4 * 4);
+ for (uint32_t i = 0; i < channel_size; i++)
+ {
+ beta->at<loco::DataType::FLOAT32>(i) = i;
+ }
+ }
+
+public:
luci::CircleSub *sub = nullptr;
luci::CircleConst *beta = nullptr;
};
@@ -815,6 +854,30 @@ TEST(ConvertNCHWToNHWC, Add)
check_pre_trans(g.output->from());
}
+TEST(ConvertNCHWToNHWC, Add_NCHW_const)
+{
+ AddGraph g;
+ g.init();
+ g.update_const_shape_to_nchw();
+
+ run_phase(&g.g, false, false);
+
+ check_pre_trans(g.add->x());
+
+ auto add_succs = loco::succs(g.add);
+ EXPECT_EQ(1, add_succs.size());
+ check_post_trans(*add_succs.begin());
+
+ uint32_t channel_size = 16;
+ auto new_beta = dynamic_cast<luci::CircleConst *>(g.add->y());
+ EXPECT_NE(nullptr, new_beta);
+ EXPECT_EQ(4, new_beta->rank());
+ EXPECT_EQ(1, new_beta->dim(0).value());
+ EXPECT_EQ(4, new_beta->dim(1).value());
+ EXPECT_EQ(4, new_beta->dim(2).value());
+ EXPECT_EQ(channel_size, new_beta->dim(3).value());
+}
+
TEST(ConvertNCHWToNHWC, NHWC_Relu)
{
// Relu is already NHWC, so it should not be converted
@@ -1123,6 +1186,30 @@ TEST(ConvertNCHWToNHWC, Mul)
check_pre_trans(g.output->from());
}
+TEST(ConvertNCHWToNHWC, Mul_NCHW_const)
+{
+ MulGraph g;
+ g.init();
+ g.update_const_shape_to_nchw();
+
+ run_phase(&g.g, false, false);
+
+ check_pre_trans(g.mul->x());
+
+ auto mul_succs = loco::succs(g.mul);
+ EXPECT_EQ(1, mul_succs.size());
+ check_post_trans(*mul_succs.begin());
+
+ uint32_t channel_size = 16;
+ auto new_multiplier = dynamic_cast<luci::CircleConst *>(g.mul->y());
+ EXPECT_NE(nullptr, new_multiplier);
+ EXPECT_EQ(4, new_multiplier->rank());
+ EXPECT_EQ(1, new_multiplier->dim(0).value());
+ EXPECT_EQ(4, new_multiplier->dim(1).value());
+ EXPECT_EQ(4, new_multiplier->dim(2).value());
+ EXPECT_EQ(channel_size, new_multiplier->dim(3).value());
+}
+
TEST(ConvertNCHWToNHWC, MulScalar)
{
MulScalarGraph g;
@@ -1432,6 +1519,30 @@ TEST(ConvertNCHWToNHWC, Sub)
check_pre_trans(g.output->from());
}
+TEST(ConvertNCHWToNHWC, Sub_NCHW_const)
+{
+ SubGraph g;
+ g.init();
+ g.update_const_shape_to_nchw();
+
+ run_phase(&g.g, false, false);
+
+ check_pre_trans(g.sub->x());
+
+ auto sub_succs = loco::succs(g.sub);
+ EXPECT_EQ(1, sub_succs.size());
+ check_post_trans(*sub_succs.begin());
+
+ uint32_t channel_size = 16;
+ auto new_beta = dynamic_cast<luci::CircleConst *>(g.sub->y());
+ EXPECT_NE(nullptr, new_beta);
+ EXPECT_EQ(4, new_beta->rank());
+ EXPECT_EQ(1, new_beta->dim(0).value());
+ EXPECT_EQ(4, new_beta->dim(1).value());
+ EXPECT_EQ(4, new_beta->dim(2).value());
+ EXPECT_EQ(channel_size, new_beta->dim(3).value());
+}
+
TEST(ConvertNCHWToNHWC, SubScalar)
{
SubScalarGraph g;
diff --git a/compiler/luci/pass/src/ExpandBroadcastConstPass.cpp b/compiler/luci/pass/src/ExpandBroadcastConstPass.cpp
new file mode 100644
index 000000000..25fb9f171
--- /dev/null
+++ b/compiler/luci/pass/src/ExpandBroadcastConstPass.cpp
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ExpandBroadcastConstPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Log.h>
+
+#include <type_traits>
+
+namespace
+{
+
+luci::CircleConst *create_expanded_constant(luci::CircleConst *node, luci::CircleNode *successor)
+{
+ LOGGER(l);
+
+ if (successor->rank() != node->rank())
+ return nullptr;
+
+ std::vector<uint32_t> broadcast_dims;
+ for (uint32_t dim = 0; dim < node->rank(); ++dim)
+ {
+ if (node->dim(dim) == successor->dim(dim))
+ continue;
+
+ if (node->dim(dim) == 1)
+ broadcast_dims.push_back(dim);
+ }
+
+ if (broadcast_dims.size() != 1 || broadcast_dims.back() != node->rank() - 1)
+ {
+ WARN(l) << "NYI: Only depth broadcast removal is supported";
+ return nullptr;
+ }
+
+ auto constant = node->graph()->nodes()->create<luci::CircleConst>();
+ constant->name(node->name());
+ constant->dtype(node->dtype());
+ constant->rank(node->rank());
+ constant->shape_status(luci::ShapeStatus::VALID);
+
+ uint32_t node_size = node->size<loco::DataType::FLOAT32>();
+ uint32_t constant_size = 1;
+ for (uint32_t i = 0; i < successor->rank(); ++i)
+ {
+ constant->dim(i).set(successor->dim(i).value());
+ constant_size *= constant->dim(i).value();
+ }
+ constant->size<loco::DataType::FLOAT32>(constant_size);
+
+ auto const node_data = &node->at<loco::DataType::FLOAT32>(0);
+ auto const constant_data = &constant->at<loco::DataType::FLOAT32>(0);
+
+ auto const successor_depth = successor->dim(successor->rank() - 1).value();
+ for (uint32_t d = 0; d < successor_depth; ++d)
+ std::copy(node_data, node_data + node_size, constant_data + d * node_size);
+
+ return constant;
+}
+
+template <typename N> bool expand_node_input(luci::CircleConst *node, luci::CircleNode *successor)
+{
+ static_assert(std::is_base_of<luci::CircleNode, N>::value,
+ "Successor node should have CircleNode base");
+
+ auto const successor_node = loco::must_cast<N *>(successor);
+ auto const successor_x = loco::must_cast<luci::CircleNode *>(successor_node->x());
+ auto const successor_y = loco::must_cast<luci::CircleNode *>(successor_node->y());
+
+ luci::CircleConst *expanded_const;
+
+ if (node == successor_x)
+ {
+ expanded_const = create_expanded_constant(node, successor_y);
+
+ if (expanded_const == nullptr)
+ return false;
+
+ successor_node->x(expanded_const);
+ }
+ else if (node == successor_y)
+ {
+ expanded_const = create_expanded_constant(node, successor_x);
+
+ if (expanded_const == nullptr)
+ return false;
+
+ successor_node->y(expanded_const);
+ }
+
+ return true;
+}
+
+/**
+ * Expand constants following broadcasting rules for binary input nodes (Add, Mul, etc.)
+ *
+ * BEFORE
+ *
+ * [CircleInput] [CircleConst (H x W x 1)]
+ * | |
+ * [CircleAdd]
+ *
+ * AFTER
+ *
+ * [CircleInput] [CircleConst (H x W x D)]
+ * | |
+ * [CircleAdd]
+ */
+bool expand_broadcast_const(luci::CircleConst *node)
+{
+ if (node->dtype() != loco::DataType::FLOAT32)
+ return false; // Unsupported data type
+
+ bool changed = false;
+
+ for (auto successor : loco::succs(node))
+ {
+ auto const circle_successor = loco::must_cast<luci::CircleNode *>(successor);
+ switch (circle_successor->opcode())
+ {
+ case luci::CircleOpcode::ADD:
+ if (expand_node_input<luci::CircleAdd>(node, circle_successor))
+ changed = true;
+ break;
+ case luci::CircleOpcode::MUL:
+ if (expand_node_input<luci::CircleMul>(node, circle_successor))
+ changed = true;
+ break;
+ case luci::CircleOpcode::DIV:
+ if (expand_node_input<luci::CircleDiv>(node, circle_successor))
+ changed = true;
+ break;
+ default:
+ break; // Unsupported successor node
+ }
+ }
+
+ return changed;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Broadcast expanding for Const nodes
+ **/
+bool ExpandBroadcastConstPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto const_node = dynamic_cast<luci::CircleConst *>(node);
+ if (const_node == nullptr)
+ continue;
+
+ if (expand_broadcast_const(const_node))
+ changed = true;
+ }
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ExpandBroadcastConstPass.test.cpp b/compiler/luci/pass/src/ExpandBroadcastConstPass.test.cpp
new file mode 100644
index 000000000..0734e0778
--- /dev/null
+++ b/compiler/luci/pass/src/ExpandBroadcastConstPass.test.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ExpandBroadcastConstPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class ExpandBroadcastConstTest : public ::testing::Test
+{
+public:
+ ExpandBroadcastConstTest()
+ {
+ _x = _g.nodes()->create<luci::CircleInput>();
+ _y = _g.nodes()->create<luci::CircleConst>();
+ _add = _g.nodes()->create<luci::CircleAdd>();
+ _output = _g.nodes()->create<luci::CircleOutput>();
+
+ auto graph_input = _g.inputs()->create();
+ graph_input->dtype(loco::DataType::FLOAT32);
+ graph_input->shape({1, H, W, D});
+ _x->index(graph_input->index());
+ _x->dtype(graph_input->dtype());
+ _x->shape({1, H, W, D});
+
+ auto graph_output = _g.outputs()->create();
+ graph_output->dtype(loco::DataType::FLOAT32);
+ graph_output->shape({1, H, W, D});
+ _output->index(graph_output->index());
+ _output->dtype(graph_output->dtype());
+ _output->shape({1, H, W, D});
+
+ _y->dtype(loco::DataType::FLOAT32);
+ _y->shape({1, H, W, 1});
+ _y->size<loco::DataType::FLOAT32>(16);
+
+ _add->dtype(loco::DataType::FLOAT32);
+ _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _add->x(_x);
+ _add->y(_y);
+ _add->shape({1, H, W, D});
+
+ _output->from(_add);
+
+ _x->name("input");
+ _output->name("output");
+ }
+
+protected:
+ uint32_t const H = 4;
+ uint32_t const W = 4;
+ uint32_t const D = 3;
+
+protected:
+ loco::Graph _g;
+ luci::CircleAdd *_add = nullptr;
+ luci::CircleInput *_x = nullptr;
+ luci::CircleConst *_y = nullptr;
+ luci::CircleOutput *_output = nullptr;
+};
+
+} // namespace
+
+TEST_F(ExpandBroadcastConstTest, name)
+{
+ luci::ExpandBroadcastConstPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST_F(ExpandBroadcastConstTest, remove_broadcast)
+{
+ for (uint32_t i = 0; i < H * W; ++i)
+ _y->at<loco::DataType::FLOAT32>(i) = static_cast<float>(i);
+
+ luci::ExpandBroadcastConstPass pass;
+ ASSERT_TRUE(pass.run(&_g));
+
+ auto broadcasted_const = dynamic_cast<luci::CircleConst *>(_add->y());
+ ASSERT_NE(broadcasted_const, nullptr);
+
+ EXPECT_EQ(broadcasted_const->dtype(), loco::DataType::FLOAT32);
+ EXPECT_EQ(broadcasted_const->dim(1).value(), H);
+ EXPECT_EQ(broadcasted_const->dim(2).value(), W);
+ EXPECT_EQ(broadcasted_const->dim(3).value(), D);
+ EXPECT_EQ(broadcasted_const->size<loco::DataType::FLOAT32>(), H * W * D);
+
+ for (uint32_t i = 0; i < H * W; ++i)
+ {
+ for (uint32_t d = 0; d < D; ++d)
+ {
+ EXPECT_NEAR(broadcasted_const->at<loco::DataType::FLOAT32>(i + H * W * d),
+ static_cast<float>(i), std::numeric_limits<float>::min());
+ }
+ }
+}
+
+TEST_F(ExpandBroadcastConstTest, remove_broadcast_multiple_successors)
+{
+ auto const circle_sqrt = _g.nodes()->create<luci::CircleSqrt>();
+ circle_sqrt->dtype(loco::DataType::FLOAT32);
+ circle_sqrt->shape({1, H, W, 1});
+ circle_sqrt->x(_y);
+
+ luci::ExpandBroadcastConstPass pass;
+ ASSERT_TRUE(pass.run(&_g));
+
+ auto broadcasted_const = dynamic_cast<luci::CircleConst *>(_add->y());
+ auto original_const = dynamic_cast<luci::CircleConst *>(circle_sqrt->x());
+
+ ASSERT_NE(broadcasted_const, nullptr);
+ EXPECT_EQ(broadcasted_const->dtype(), loco::DataType::FLOAT32);
+ EXPECT_EQ(broadcasted_const->dim(3).value(), D);
+ EXPECT_EQ(broadcasted_const->size<loco::DataType::FLOAT32>(), H * W * D);
+
+ // Check if another successor's node was left intact
+ ASSERT_NE(original_const, nullptr);
+ EXPECT_EQ(original_const->dtype(), loco::DataType::FLOAT32);
+ EXPECT_EQ(original_const->dim(3).value(), 1);
+ EXPECT_EQ(original_const->size<loco::DataType::FLOAT32>(), H * W * 1);
+}
+
+TEST_F(ExpandBroadcastConstTest, broadcast_impossible_NEG)
+{
+ _y->shape({1, H, W, 2});
+ _y->size<loco::DataType::FLOAT32>(H * W * (D - 1));
+
+ luci::ExpandBroadcastConstPass pass;
+ ASSERT_FALSE(pass.run(&_g));
+}
diff --git a/compiler/luci/pass/src/FoldDepthwiseConv2DPass.cpp b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.cpp
new file mode 100644
index 000000000..6e423e3d9
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDepthwiseConv2DPass.h"
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/AttrFusedActFunc.h>
+
+#include <luci/Log.h>
+
+namespace
+{
+
+// TODO Share activation mix/max and compute_input/output code with luci-interpreter
+
+bool compute_output(uint32_t *output_size, luci::Padding padding, int32_t image_size,
+ int32_t filter_size, int32_t stride, int32_t dilation_rate)
+{
+ auto const effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+ switch (padding)
+ {
+ case luci::Padding::SAME:
+ *output_size = (image_size + stride - 1) / stride;
+ return true;
+
+ case luci::Padding::VALID:
+ *output_size = (image_size + stride - effective_filter_size) / stride;
+ return true;
+
+ default:
+ {
+ LOGGER(l);
+ WARN(l) << "Unsupported padding: " << uint32_t(padding);
+ return false;
+ }
+ }
+}
+
+uint32_t compute_padding(int32_t stride, int32_t dilation_rate, int32_t in_size,
+ int32_t filter_size, int32_t out_size)
+{
+ auto const effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+ auto const padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
+ return padding > 0 ? padding : 0;
+}
+
+bool set_kernel_parameters(tflite::DepthwiseParams *params, luci::CircleDepthwiseConv2D *node,
+ uint32_t padding_height, uint32_t padding_width)
+{
+ switch (node->fusedActivationFunction())
+ {
+ case luci::FusedActFunc::NONE:
+ case luci::FusedActFunc::TANH:
+ params->float_activation_min = std::numeric_limits<float>::lowest();
+ params->float_activation_max = std::numeric_limits<float>::max();
+ break;
+ case luci::FusedActFunc::RELU:
+ params->float_activation_min = 0;
+ params->float_activation_max = std::numeric_limits<float>::max();
+ break;
+ case luci::FusedActFunc::RELU_N1_TO_1:
+ params->float_activation_min = -1;
+ params->float_activation_max = 1;
+ break;
+ case luci::FusedActFunc::RELU6:
+ params->float_activation_min = 0;
+ params->float_activation_max = 6;
+ break;
+ default:
+ {
+ LOGGER(l);
+ WARN(l) << "Unsupported activation: " << uint32_t(node->fusedActivationFunction());
+ return false;
+ }
+ }
+
+ params->stride_height = node->stride()->h();
+ params->stride_width = node->stride()->w();
+ params->dilation_height_factor = node->dilation()->h();
+ params->dilation_width_factor = node->dilation()->w();
+ params->depth_multiplier = node->depthMultiplier();
+
+ params->padding_values.height = padding_height;
+ params->padding_values.width = padding_width;
+
+ return true;
+}
+
+/**
+ * Fold DepthwiseConv2D with constant input and filter into a constant tensor
+ *
+ * BEFORE
+ *
+ * [CircleConst] [CircleConst]
+ * | |
+ * [CircleDepthwiseConv2D]
+ *
+ * AFTER
+ *
+ * [CircleConst]
+ */
+bool fold_depthwise_conv_2d(luci::CircleDepthwiseConv2D *node)
+{
+ LOGGER(l);
+
+ auto const input = dynamic_cast<luci::CircleConst *>(node->input());
+
+ if (input == nullptr)
+ return false; // Constant input is required for folding
+
+ auto const filter = dynamic_cast<luci::CircleConst *>(node->filter());
+
+ if (filter == nullptr)
+ return false; // Constant filter is required for folding
+
+ if (filter->dim(0).value() != 1)
+ return false; // Unsupported batch size
+
+ auto const bias = dynamic_cast<luci::CircleConst *>(node->bias());
+
+ if (bias == nullptr)
+ return false; // Constant bias is required for folding
+
+ auto const input_batches = input->dim(0).value();
+ auto const input_height = input->dim(1).value();
+ auto const input_width = input->dim(2).value();
+ auto const input_depth = input->dim(3).value();
+
+ auto const filter_height = filter->dim(1).value();
+ auto const filter_width = filter->dim(2).value();
+ auto const filter_channels_out = filter->dim(3).value();
+
+ if (filter_channels_out % input_depth != 0)
+ return false; // Wrong input/output depth ratio
+
+ if (node->depthMultiplier() != static_cast<int32_t>(filter_channels_out / input_depth))
+ return false; // Wrong depth multiplier value
+
+ if (bias->rank() != 1 || bias->dim(0).value() != filter_channels_out)
+ return false; // Unsupported bias value
+
+ uint32_t output_height = 0;
+ uint32_t output_width = 0;
+
+ if (!compute_output(&output_height, node->padding(), input_height, filter_height,
+ node->stride()->h(), node->dilation()->h()))
+ return false; // Unsupported output parameters
+
+ if (!compute_output(&output_width, node->padding(), input_width, filter_width,
+ node->stride()->w(), node->dilation()->w()))
+ return false; // Unsupported output parameters
+
+ auto const padding_height = compute_padding(node->stride()->h(), node->dilation()->h(),
+ input_height, filter_height, output_height);
+ auto const padding_width = compute_padding(node->stride()->w(), node->dilation()->w(),
+ input_width, filter_width, output_width);
+
+ tflite::DepthwiseParams params{};
+
+ if (!set_kernel_parameters(&params, node, padding_height, padding_width))
+ return false; // Unsupported kernel parameter values
+
+ auto constant = node->graph()->nodes()->create<luci::CircleConst>();
+ constant->name(node->name());
+ constant->dtype(node->dtype());
+ constant->rank(node->rank());
+ constant->shape_status(luci::ShapeStatus::VALID);
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ constant->dim(i).set(node->dim(i).value());
+
+ constant->size<loco::DataType::FLOAT32>(input_batches * output_height * output_width *
+ filter_channels_out);
+
+ auto const input_data = &input->at<loco::DataType::FLOAT32>(0);
+ auto const filter_data = &filter->at<loco::DataType::FLOAT32>(0);
+ auto const bias_data = &bias->at<loco::DataType::FLOAT32>(0);
+ auto const constant_data = &constant->at<loco::DataType::FLOAT32>(0);
+
+ auto tensor_shape = [](luci::CircleNode *node) {
+ tflite::RuntimeShape runtime_shape(node->rank());
+ for (uint32_t i = 0; i < node->rank(); ++i)
+ runtime_shape.SetDim(i, node->dim(i).value());
+ return runtime_shape;
+ };
+
+ tflite::reference_ops::DepthwiseConv(params, tensor_shape(input), input_data,
+ tensor_shape(filter), filter_data, tensor_shape(bias),
+ bias_data, tensor_shape(constant), constant_data);
+
+ loco::replace(node).with(constant);
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for DepthwiseConv2D Op
+ **/
+bool FoldDepthwiseConv2DPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto depthwise_conv2d = dynamic_cast<CircleDepthwiseConv2D *>(node);
+
+ if (depthwise_conv2d == nullptr)
+ continue;
+
+ switch (depthwise_conv2d->dtype())
+ {
+ case loco::DataType::FLOAT32:
+ changed = fold_depthwise_conv_2d(depthwise_conv2d);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldDepthwiseConv2DPass.test.cpp b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.test.cpp
new file mode 100644
index 000000000..b1ef56833
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.test.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDepthwiseConv2DPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ * Graph has an DepthwiseConv2D Op with constant inputs
+ *
+ * BEFORE
+ *
+ * [CircleConst] [CircleConst]
+ * | |
+ * [CircleDepthwiseConv2D]
+ *
+ * AFTER
+ *
+ * [CircleConst]
+ */
+class FoldDepthwiseConv2DTest : public luci::ConstantFoldingTestGraph, public ::testing::Test
+{
+public:
+ FoldDepthwiseConv2DTest() : luci::ConstantFoldingTestGraph({1, 4, 4, 1}, loco::DataType::FLOAT32)
+ {
+ _dconv = _g.nodes()->create<luci::CircleDepthwiseConv2D>();
+ _dconv_input = _g.nodes()->create<luci::CircleConst>();
+ _dconv_filter = _g.nodes()->create<luci::CircleConst>();
+ _dconv_bias = _g.nodes()->create<luci::CircleConst>();
+
+ _dconv->dtype(loco::DataType::FLOAT32);
+ _dconv->padding(luci::Padding::VALID);
+ _dconv->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _dconv->input(_dconv_input);
+ _dconv->filter(_dconv_filter);
+ _dconv->bias(_dconv_bias);
+ _dconv->shape({1, 4, 4, 1});
+ _dconv->stride()->h(1);
+ _dconv->stride()->w(1);
+ _dconv->depthMultiplier(1);
+
+ _dconv_input->dtype(loco::DataType::FLOAT32);
+ _dconv_input->shape({1, 4, 4, 1});
+ _dconv_input->size<loco::DataType::FLOAT32>(16);
+
+ _dconv_filter->dtype(loco::DataType::FLOAT32);
+ _dconv_filter->shape({1, 1, 1, 1});
+ _dconv_filter->size<loco::DataType::FLOAT32>(1);
+
+ _dconv_bias->dtype(loco::DataType::FLOAT32);
+ _dconv_bias->shape({1});
+ _dconv_bias->size<loco::DataType::FLOAT32>(1);
+
+ _output->from(_dconv);
+ }
+
+protected:
+ void init() final {}
+
+protected:
+ loco::Node *createFoldedPattern() final { return nullptr; }
+
+protected:
+ luci::CircleConst *getFoldedPattern() final
+ {
+ return loco::must_cast<luci::CircleConst *>(_output->from());
+ }
+
+protected:
+ luci::CircleDepthwiseConv2D *_dconv = nullptr;
+ luci::CircleConst *_dconv_input = nullptr;
+ luci::CircleConst *_dconv_filter = nullptr;
+ luci::CircleConst *_dconv_bias = nullptr;
+};
+
+} // namespace
+
+TEST(FoldDepthwiseConv2DPass, name)
+{
+ luci::FoldDepthwiseConv2DPass pass;
+ auto const name = pass.name();
+ ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FoldDepthwiseConv2DTest, fold_depthwise_conv2d)
+{
+ for (uint32_t i = 0; i < 16; ++i)
+ _dconv_input->at<loco::DataType::FLOAT32>(i) = 0.5;
+ _dconv_filter->at<loco::DataType::FLOAT32>(0) = 0.5;
+
+ luci::FoldDepthwiseConv2DPass pass;
+ ASSERT_TRUE(pass.run(&_g));
+
+ auto folded_const = getFoldedPattern();
+ EXPECT_EQ(folded_const->dtype(), loco::DataType::FLOAT32);
+ EXPECT_NEAR(folded_const->at<loco::DataType::FLOAT32>(0), 0.25,
+ std::numeric_limits<float>::min());
+ EXPECT_NEAR(folded_const->at<loco::DataType::FLOAT32>(15), 0.25,
+ std::numeric_limits<float>::min());
+}
+
+TEST_F(FoldDepthwiseConv2DTest, fold_non_constant_NEG)
+{
+ _dconv->input(_input);
+
+ luci::FoldDepthwiseConv2DPass pass;
+ ASSERT_FALSE(pass.run(&_g));
+}
diff --git a/compiler/luci/pass/src/ForceQuantParamPass.cpp b/compiler/luci/pass/src/ForceQuantParamPass.cpp
new file mode 100644
index 000000000..32d482fc1
--- /dev/null
+++ b/compiler/luci/pass/src/ForceQuantParamPass.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForceQuantParamPass.h"
+#include "luci/Profile/CircleNodeID.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Log.h>
+
+namespace luci
+{
+
+namespace
+{
+
+void set_qparam(luci::CircleNode *node, float scale, int64_t zp)
+{
+ assert(node); // FIX_CALLER_UNLESS
+
+ auto quantparam = std::make_unique<CircleQuantParam>();
+ quantparam->scale.push_back(scale);
+ quantparam->zerop.push_back(zp);
+
+ node->quantparam(std::move(quantparam));
+}
+
+} // namespace
+
+bool ForceQuantParamPass::run(loco::Graph *g)
+{
+ LOGGER(l);
+ INFO(l) << "ForceQuantParamPass Start" << std::endl;
+
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto const cnode = loco::must_cast<CircleNode *>(node);
+ auto const name = cnode->name();
+ auto target = std::find(_tensors.begin(), _tensors.end(), name);
+ if (target == _tensors.end())
+ continue;
+
+ auto index = target - _tensors.begin();
+ auto scale = _scales[index];
+ auto zp = _zerops[index];
+ set_qparam(cnode, scale, zp);
+
+ _tensors.erase(_tensors.begin() + index);
+ _scales.erase(_scales.begin() + index);
+ _zerops.erase(_zerops.begin() + index);
+ }
+
+ if (_tensors.size() > 0)
+ {
+ std::string msg;
+ for (auto const &t : _tensors)
+ msg += "Tensor does not exist: " + t + ".\n";
+ msg += "Please check tensor name.\n";
+ throw std::runtime_error(msg);
+ }
+
+ INFO(l) << "ForceQuantParamPass End" << std::endl;
+ return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ForceQuantParamPass.test.cpp b/compiler/luci/pass/src/ForceQuantParamPass.test.cpp
new file mode 100644
index 000000000..a9da7c25e
--- /dev/null
+++ b/compiler/luci/pass/src/ForceQuantParamPass.test.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForceQuantParamPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using TensorVector = luci::ForceQuantParamPass::TensorVector;
+using ScaleVector = luci::ForceQuantParamPass::ScaleVector;
+using ZPVector = luci::ForceQuantParamPass::ZPVector;
+
+std::unique_ptr<luci::CircleQuantParam> make_qparam(float scale, int64_t zp)
+{
+ auto qparam = std::make_unique<luci::CircleQuantParam>();
+ qparam->scale.push_back(scale);
+ qparam->zerop.push_back(zp);
+
+ return std::move(qparam);
+}
+
+bool check_per_tensor_qparam(luci::CircleNode *node, float scale, int64_t zp)
+{
+ assert(node); // FIX_CALLER_UNLESS
+
+ auto qparam = node->quantparam();
+ if (qparam->scale.size() != 1)
+ return false;
+
+ if (qparam->scale[0] != scale)
+ return false;
+
+ if (qparam->zerop.size() != 1)
+ return false;
+
+ if (qparam->zerop[0] != zp)
+ return false;
+
+ return true;
+}
+
+/**
+ * Graph with a single input and a single output.
+ *
+ * [Input]
+ * |
+ * (graph body) -> implemented by insertGraphBody()
+ * |
+ * [Output]
+ *
+ */
+class SISOGraph
+{
+public:
+ SISOGraph() = default;
+
+public:
+ void init()
+ {
+ input = g.nodes()->create<luci::CircleInput>();
+ output = g.nodes()->create<luci::CircleOutput>();
+ input->name("input");
+ output->name("output");
+
+ auto graph_input = g.inputs()->create();
+ input->index(graph_input->index());
+ auto graph_output = g.outputs()->create();
+ output->index(graph_output->index());
+
+ graph_input->dtype(loco::DataType::U8);
+ input->dtype(loco::DataType::U8);
+ output->dtype(loco::DataType::U8);
+ graph_output->dtype(loco::DataType::U8);
+
+ input->quantparam(make_qparam(0.1, 11));
+ output->quantparam(make_qparam(0.2, 12));
+
+ uint32_t channel_size = 16;
+ graph_input->shape({1, channel_size, 4, 4});
+ input->shape({1, channel_size, 4, 4});
+ output->shape({1, channel_size, 4, 4});
+ graph_output->shape({1, channel_size, 4, 4});
+
+ auto graph_body = insertGraphBody(input);
+ output->from(graph_body);
+ }
+
+ virtual ~SISOGraph() = default;
+
+protected:
+ virtual loco::Node *insertGraphBody(loco::Node *input) = 0;
+
+public:
+ loco::Graph g;
+ luci::CircleInput *input = nullptr;
+ luci::CircleOutput *output = nullptr;
+};
+
+class AddGraph final : public SISOGraph
+{
+protected:
+ loco::Node *insertGraphBody(loco::Node *input) override
+ {
+ add = g.nodes()->create<luci::CircleAdd>();
+ beta = g.nodes()->create<luci::CircleConst>();
+
+ add->dtype(loco::DataType::U8);
+ beta->dtype(loco::DataType::U8);
+ add->quantparam(make_qparam(0.1, 11));
+ beta->quantparam(make_qparam(0.2, 12));
+
+ uint32_t channel_size = 16;
+ add->shape({1, 4, 4, channel_size});
+ beta->shape({1, 1, 1, channel_size});
+
+ beta->size<loco::DataType::U8>(channel_size);
+ for (uint32_t i = 0; i < channel_size; i++)
+ {
+ beta->at<loco::DataType::U8>(i) = i;
+ }
+
+ add->x(input);
+ add->y(beta);
+
+ add->name("add");
+ beta->name("beta");
+
+ return add;
+ }
+
+public:
+ luci::CircleAdd *add = nullptr;
+ luci::CircleConst *beta = nullptr;
+};
+
+} // namespace
+
+TEST(ForceQuantParamPassTest, simple)
+{
+ TensorVector tensors{"input", "add"};
+ ScaleVector scales{2.0, 3.0};
+ ZPVector zerops{4, 8};
+
+ luci::ForceQuantParamPass pass(tensors, scales, zerops);
+
+ AddGraph g;
+ g.init();
+
+ pass.run(&g.g);
+
+ EXPECT_TRUE(check_per_tensor_qparam(g.input, 2.0, 4));
+ EXPECT_TRUE(check_per_tensor_qparam(g.add, 3.0, 8));
+}
+
+TEST(ForceQuantParamPassTest, name_mismatch_NEG)
+{
+ TensorVector tensors{"no_exist"};
+ ScaleVector scales{2.0};
+ ZPVector zerops{4};
+
+ luci::ForceQuantParamPass pass(tensors, scales, zerops);
+
+ AddGraph g;
+ g.init();
+
+ EXPECT_THROW(pass.run(&g.g), std::runtime_error);
+}
diff --git a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp
new file mode 100644
index 000000000..97a962cb6
--- /dev/null
+++ b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseAddWithFullyConnectedPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+/**
+ * Fuse Add to FullyConnected if the added value is a channel(last dimension)-wise constant
+ *
+ * BEFORE
+ * |
+ * [CircleFullyConnected]
+ * |
+ * [CircleAdd]
+ * |
+ *
+ * AFTER
+ * |
+ * [CircleFullyConnected] [CircleAdd] (dead)
+ * |
+ *
+ */
+bool fuse_add_with_fc(luci::CircleFullyConnected *fc)
+{
+ if (not fc)
+ return false;
+
+ if (fc->dtype() != loco::DataType::FLOAT32)
+ return false;
+
+ if (fc->fusedActivationFunction() != luci::FusedActFunc::NONE)
+ return false;
+
+ auto weights = dynamic_cast<luci::CircleConst *>(fc->weights());
+ if (not weights)
+ return false;
+
+ // Get add node
+ auto fc_output = loco::succs(fc);
+ if (fc_output.size() != 1)
+ return false;
+
+ auto add = dynamic_cast<luci::CircleAdd *>(*fc_output.begin());
+ if (not add)
+ return false;
+ if (add->dtype() != loco::DataType::FLOAT32)
+ return false;
+
+ // Get addition
+ auto addition = add->x() == fc ? dynamic_cast<luci::CircleConst *>(add->y())
+ : dynamic_cast<luci::CircleConst *>(add->x());
+
+ // Non-const addition
+ if (not addition)
+ return false;
+
+ auto rank = addition->rank();
+ // TODO Support scalar addition
+ if (rank == 0)
+ return false;
+
+ for (uint32_t i = 0; i < rank - 1; i++)
+ {
+ if (addition->dim(i).value() != 1)
+ return false;
+ }
+ // Check the last dimesion of addition is the same with the number of neurons of FC
+ if (not(addition->dim(rank - 1) == weights->dim(0)))
+ return false;
+
+ auto fused_bias = luci::clone(addition);
+
+ // Add existing bias values
+ if (auto const_bias = dynamic_cast<luci::CircleConst *>(fc->bias()))
+ {
+ assert(const_bias->dtype() == loco::DataType::FLOAT32);
+
+ auto bias_size = fused_bias->size<loco::DataType::FLOAT32>();
+ assert(bias_size == const_bias->size<loco::DataType::FLOAT32>());
+ for (uint32_t i = 0; i < bias_size; i++)
+ fused_bias->at<loco::DataType::FLOAT32>(i) += const_bias->at<loco::DataType::FLOAT32>(i);
+ }
+
+ fc->bias(fused_bias);
+ fc->fusedActivationFunction(add->fusedActivationFunction());
+
+ // set origin
+ luci::add_origin(fc, luci::get_origin(add));
+
+ replace(add).with(fc);
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseAddWithFullyConnectedPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ auto fc = dynamic_cast<luci::CircleFullyConnected *>(node);
+ if (not fc)
+ continue;
+
+ if (fuse_add_with_fc(fc))
+ changed = true;
+ }
+
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.test.cpp b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.test.cpp
new file mode 100644
index 000000000..4cc2eb599
--- /dev/null
+++ b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.test.cpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseAddWithFullyConnectedPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp
+template <typename T>
+luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+ const std::vector<uint32_t> &shape,
+ const std::vector<T> &values)
+{
+ auto node = g->nodes()->create<luci::CircleConst>();
+ node->dtype(dtype);
+ node->rank(shape.size());
+
+ uint32_t size = 1;
+ for (uint32_t i = 0; i < shape.size(); ++i)
+ {
+ node->dim(i) = shape.at(i);
+ size *= shape.at(i);
+ }
+ node->shape_status(luci::ShapeStatus::VALID);
+
+#define INIT_VALUES(DT) \
+ { \
+ node->size<DT>(size); \
+ for (uint32_t i = 0; i < values.size(); ++i) \
+ node->at<DT>(i) = values[i]; \
+ }
+
+ switch (dtype)
+ {
+ case loco::DataType::U8:
+ INIT_VALUES(loco::DataType::U8);
+ break;
+ case loco::DataType::S16:
+ INIT_VALUES(loco::DataType::S16);
+ break;
+ case loco::DataType::S32:
+ INIT_VALUES(loco::DataType::S32);
+ break;
+ case loco::DataType::FLOAT32:
+ INIT_VALUES(loco::DataType::FLOAT32)
+ break;
+ default:
+ INTERNAL_EXN("create_const_node called with unsupported type");
+ break;
+ }
+ return node;
+}
+
+/**
+ * Simple graph for test
+ *
+ * BEFORE
+ *
+ * [FC]
+ * |
+ * [Add w/ Relu]
+ *
+ * AFTER
+ *
+ * [FC w/ Relu] (bias updated)
+ *
+ */
+class FCAddGraphlet
+{
+public:
+ FCAddGraphlet() = default;
+
+ void init(loco::Graph *g)
+ {
+ std::vector<float> weights_val(16 * 4);
+ _fc_f = create_const_node(g, loco::DataType::FLOAT32, {16, 4}, weights_val);
+
+ std::vector<float> bias_val(16);
+ _fc_b = create_const_node(g, loco::DataType::FLOAT32, {1, 16}, bias_val);
+
+ _fc = g->nodes()->create<luci::CircleFullyConnected>();
+ _fc->weights(_fc_f);
+ _fc->bias(_fc_b);
+ _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+ _fc->dtype(loco::DataType::FLOAT32);
+ _fc->shape({1, 16});
+ _fc->name("fc");
+
+ std::vector<float> addition_val;
+ for (uint32_t i = 0; i < 16; i++)
+ addition_val.push_back(static_cast<float>(i));
+ _add_c = create_const_node(g, loco::DataType::FLOAT32, {1, 16}, addition_val);
+
+ _add = g->nodes()->create<luci::CircleAdd>();
+ _add->x(_fc);
+ _add->y(_add_c);
+ _add->fusedActivationFunction(luci::FusedActFunc::RELU);
+ _add->dtype(loco::DataType::FLOAT32);
+ _add->shape({1, 16});
+ _add->name("add");
+ }
+
+public:
+ luci::CircleFullyConnected *fc() { return _fc; }
+
+protected:
+ luci::CircleFullyConnected *_fc = nullptr;
+ luci::CircleAdd *_add = nullptr;
+ luci::CircleConst *_fc_f = nullptr;
+ luci::CircleConst *_fc_b = nullptr;
+ luci::CircleConst *_add_c = nullptr;
+};
+
+class FuseAddWithFCTestGraph : public TestIOGraph, public FCAddGraphlet
+{
+public:
+ FuseAddWithFCTestGraph() = default;
+
+ void init(void)
+ {
+ TestIOGraph::init({1, 4}, {1, 16});
+ FCAddGraphlet::init(g());
+
+ _fc->input(input());
+
+ output()->from(_add);
+ }
+};
+
+class FuseAddWithFullyConnectedPassTest : public ::testing::Test
+{
+public:
+ FuseAddWithFCTestGraph g;
+ luci::FuseAddWithFullyConnectedPass pass;
+};
+
+} // namespace
+
+TEST_F(FuseAddWithFullyConnectedPassTest, simple_test)
+{
+ g.init();
+
+ auto ret = pass.run(g.g());
+ EXPECT_EQ(true, ret);
+
+ auto fc = dynamic_cast<luci::CircleFullyConnected *>(g.output()->from());
+ EXPECT_NE(nullptr, fc);
+
+ auto bias = loco::must_cast<luci::CircleConst *>(g.fc()->bias());
+ for (uint32_t i = 0; i < bias->size<loco::DataType::FLOAT32>(); i++)
+ {
+ EXPECT_EQ(i, bias->at<loco::DataType::FLOAT32>(i));
+ }
+}
diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.cpp
index 10c113574..b1cb7a418 100644
--- a/compiler/luci/pass/src/PropagateQuantParamPass.cpp
+++ b/compiler/luci/pass/src/PropagateQuantParamPass.cpp
@@ -73,7 +73,13 @@ struct PropagateQuantParam final : public luci::CircleNodeMutableVisitor<bool>
return copy_qparam(input_node, node);
}
- // TODO : Add more Ops (e.g., Transpose)
+ bool visit(luci::CircleTranspose *node)
+ {
+ auto input_node = loco::must_cast<luci::CircleNode *>(node->a());
+ return copy_qparam(input_node, node);
+ }
+
+ // TODO : Add more Ops (e.g., layout-changing Ops)
};
} // namespace
diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
index e99c7b389..c8ad87e3d 100644
--- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
+++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
@@ -358,7 +358,7 @@ bool QuantizeDequantizeWeightsPass::run(loco::Graph *g)
// Quantize weights
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
- QuantizeDequantizeWeights qw(_input_dtype, _output_dtype, _granularity);
+ QuantizeDequantizeWeights qw(_input_model_dtype, _output_model_dtype, _granularity);
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
circle_node->accept(&qw);
}
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
index 6afc2084f..be81732f8 100644
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -609,6 +609,20 @@ struct QuantizeSpecialActivation final : public luci::CircleNodeMutableVisitor<v
set_act_qparam(node, i_scale, i_zp);
}
+ void visit(luci::CircleSplitVOut *node)
+ {
+ auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
+ auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
+ auto i_qparam = input->quantparam();
+ assert(i_qparam);
+ assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS
+ assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS
+ auto i_scale = i_qparam->scale[0];
+ auto i_zp = i_qparam->zerop[0];
+
+ set_act_qparam(node, i_scale, i_zp);
+ }
+
void visit(luci::CircleUnpackOut *node)
{
auto unpack = loco::must_cast<luci::CircleUnpack *>(node->input());
@@ -1157,6 +1171,7 @@ void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type)
case luci::CircleOpcode::REVERSE_SEQUENCE:
case luci::CircleOpcode::SLICE:
case luci::CircleOpcode::SPACE_TO_BATCH_ND:
+ case luci::CircleOpcode::SPLIT_V:
case luci::CircleOpcode::STRIDED_SLICE:
case luci::CircleOpcode::SUM:
case luci::CircleOpcode::TILE:
@@ -1176,6 +1191,7 @@ void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type)
case luci::CircleOpcode::DIV:
case luci::CircleOpcode::ELU:
case luci::CircleOpcode::EQUAL:
+ case luci::CircleOpcode::EXP:
case luci::CircleOpcode::FLOOR:
case luci::CircleOpcode::FLOOR_DIV:
case luci::CircleOpcode::GREATER:
@@ -1385,7 +1401,8 @@ void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2, loco::DataType quant
auto pad_v2_input = loco::must_cast<luci::CircleNode *>(pad_v2->arg(0));
overwrite_quantparam(pad_v2_input, pad_v2);
- auto const_value_node = dynamic_cast<luci::CircleConst *>(pad_v2->arg(2));
+ auto const_value_node = loco::must_cast<luci::CircleConst *>(
+ pad_v2->arg(2)); // FIX ignore_pad_v2_const_quantization UNLESS
auto new_const = luci::clone(const_value_node);
const auto pad_v2_input_qparam = pad_v2_input->quantparam();
@@ -1458,7 +1475,7 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
// Quantize activation
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
- QuantizeActivation qa(_input_dtype, _output_dtype);
+ QuantizeActivation qa(_input_model_dtype, _output_model_dtype);
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
circle_node->accept(&qa);
}
@@ -1466,7 +1483,7 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
// Quantize weights
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
- QuantizeWeights qw(_input_dtype, _output_dtype, _granularity);
+ QuantizeWeights qw(_input_model_dtype, _output_model_dtype, _granularity);
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
circle_node->accept(&qw);
}
@@ -1474,7 +1491,7 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
// Quantize bias
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
- QuantizeBias qb(_input_dtype, _output_dtype, _granularity);
+ QuantizeBias qb(_input_model_dtype, _output_model_dtype, _granularity);
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
circle_node->accept(&qb);
}
@@ -1491,20 +1508,20 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
// (2) concat has no fused activation function
// (3) the input is not concatenation Op
// (4) the input is not produced to Ops other than concat
- propagate_concat_quantparam(concat, _output_dtype);
+ propagate_concat_quantparam(concat, _output_model_dtype);
}
// Quantize const inputs other than weights and bias
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
- quantize_const_inputs(circle_node, _output_dtype);
+ quantize_const_inputs(circle_node, _output_model_dtype);
}
// Update qparam of output of special Ops
for (auto node : loco::active_nodes(loco::output_nodes(g)))
{
- QuantizeSpecialActivation qsa(_input_dtype, _output_dtype);
+ QuantizeSpecialActivation qsa(_input_model_dtype, _output_model_dtype);
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
circle_node->accept(&qsa);
}
@@ -1514,11 +1531,11 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
for (auto node : loco::output_nodes(g))
{
auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
- if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype)
+ if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_model_dtype)
{
- circle_node->dtype(_output_dtype);
+ circle_node->dtype(_output_model_dtype);
auto graph_output = graph_outputs->at(circle_node->index());
- graph_output->dtype(_output_dtype);
+ graph_output->dtype(_output_model_dtype);
}
}
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
index b8cc09955..3a6d86c33 100644
--- a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
@@ -189,6 +189,12 @@ void set_minmax_to_non_const(loco::Graph *g, float min, float max)
if (split_node != nullptr)
continue;
+ // Min/Max is not recorded for SplitV
+ // See MinMaxObserver.cpp in record_minmax module
+ auto splitv_node = dynamic_cast<luci::CircleSplitV *>(node);
+ if (splitv_node != nullptr)
+ continue;
+
auto circle_node = loco::must_cast<luci::CircleNode *>(node);
auto qparam = std::make_unique<luci::CircleQuantParam>();
{
@@ -410,6 +416,38 @@ private:
luci::CircleConst *_split_dim = nullptr;
};
+class SplitVTestGraph final : public luci::test::TestIOGraph
+{
+public:
+ void init(void)
+ {
+ TestIOGraph::init({1, 32}, {32});
+ _size_splits = create_dummy_const<Type::S32>(g(), {1});
+ _split_dim = create_dummy_const<Type::S32>(g(), {1});
+ _splitv = g()->nodes()->create<luci::CircleSplitV>();
+ {
+ _splitv->input(input());
+ _splitv->size_splits(_size_splits);
+ _splitv->split_dim(_split_dim);
+ }
+ _splitv_o1 = g()->nodes()->create<luci::CircleSplitVOut>();
+ {
+ _splitv_o1->input(_splitv);
+ _splitv_o1->index(0);
+ }
+
+ output()->from(_splitv_o1);
+
+ set_minmax_to_non_const(g(), -1, 1);
+ }
+
+private:
+ luci::CircleSplitV *_splitv = nullptr;
+ luci::CircleSplitVOut *_splitv_o1 = nullptr;
+ luci::CircleConst *_size_splits = nullptr;
+ luci::CircleConst *_split_dim = nullptr;
+};
+
class StridedSliceTestGraph final : public SimpleTestGraph
{
public:
@@ -1312,6 +1350,30 @@ TEST(QuantizedModelVerifierTest, Split_wrong_granularity_NEG)
SUCCEED();
}
+TEST(QuantizedModelVerifierTest, SplitV)
+{
+ TEST_WITH_GRAPH(SplitVTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_GRAPH(SplitVTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_GRAPH(SplitVTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SplitV_wrong_type_NEG)
+{
+ TEST_WITH_WRONG_TYPE(SplitVTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SplitVTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+ TEST_WITH_WRONG_TYPE(SplitVTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+ SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SplitV_wrong_granularity_NEG)
+{
+ TEST_WITH_WRONG_GRANULARITY(SplitVTestGraph, Type::U8, Granularity::LayerWise);
+ TEST_WITH_WRONG_GRANULARITY(SplitVTestGraph, Type::U8, Granularity::ChannelWise);
+ TEST_WITH_WRONG_GRANULARITY(SplitVTestGraph, Type::S16, Granularity::ChannelWise);
+ SUCCEED();
+}
+
TEST(QuantizedModelVerifierTest, StridedSlice)
{
TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::LayerWise);
diff --git a/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp b/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp
index 1737e5dd6..9f7e2f17d 100644
--- a/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp
@@ -16,12 +16,12 @@
#include "luci/Pass/ResolveCustomOpAddPass.h"
-#include "flatbuffers/flexbuffers.h"
-
#include <luci/IR/CircleNodes.h>
#include <luci/IR/AttrFusedActFunc.h>
#include <luci/Profile/CircleNodeOrigin.h>
+#include <flatbuffers/flexbuffers.h>
+
namespace
{
diff --git a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp
index 5e9466a63..7ebd7a429 100644
--- a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp
@@ -16,11 +16,11 @@
#include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
-#include "flatbuffers/flexbuffers.h"
-
#include <luci/IR/CircleNodes.h>
#include <luci/Profile/CircleNodeOrigin.h>
+#include <flatbuffers/flexbuffers.h>
+
namespace
{
diff --git a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp
index 435016f9d..7ef61c253 100644
--- a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp
@@ -18,12 +18,11 @@
#include <luci/IR/CircleNodes.h>
-#include "flatbuffers/flatbuffers.h"
-#include "flatbuffers/flexbuffers.h"
-
#include <luci/test/TestIOGraph.h>
#include <gtest/gtest.h>
+#include <flatbuffers/flatbuffers.h>
+#include <flatbuffers/flexbuffers.h>
namespace
{
diff --git a/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp
index 216778066..1e8f681c8 100644
--- a/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp
@@ -16,7 +16,6 @@
#include "luci/Pass/ResolveCustomOpMatMulPass.h"
-#include "flatbuffers/flexbuffers.h"
#include <loco/IR/DataTypeTraits.h>
#include <luci/IR/CircleNodes.h>
@@ -25,6 +24,8 @@
#include <loco.h>
#include <oops/InternalExn.h>
+#include <flatbuffers/flexbuffers.h>
+
namespace
{
diff --git a/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp b/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp
index d78a587ac..f37f27742 100644
--- a/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp
@@ -16,7 +16,6 @@
#include "luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h"
-#include "flatbuffers/flexbuffers.h"
#include <loco/IR/DataTypeTraits.h>
#include <luci/IR/CircleNodes.h>
@@ -25,6 +24,8 @@
#include <loco.h>
#include <oops/InternalExn.h>
+#include <flatbuffers/flexbuffers.h>
+
namespace
{
diff --git a/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp
new file mode 100644
index 000000000..9cba9a9e7
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstituteSplitVToSplitPass.h"
+
+#include <loco.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+void copy_quantparam(luci::CircleNode *dst, const luci::CircleNode *src)
+{
+ auto q = src->quantparam();
+ if (q == nullptr)
+ dst->quantparam(nullptr);
+ else
+ dst->quantparam(std::make_unique<luci::CircleQuantParam>(*q));
+}
+
+// SplitV is substituted to Split if the contents of size_splits are all same
+// For example,
+// size_splits = [32, 32] -> substitute
+// size_splits = [31, 33] -> do not substitute
+bool resolve_splitv(luci::CircleSplitV *sv)
+{
+ auto size_splits = dynamic_cast<luci::CircleConst *>(sv->size_splits());
+ if (not size_splits)
+ return false;
+
+ if (size_splits->dtype() != loco::DataType::S32)
+ return false;
+
+ auto num_split = size_splits->size<loco::DataType::S32>();
+ if (static_cast<int32_t>(num_split) != sv->num_split())
+ return false;
+
+ if (num_split < 1)
+ return false;
+
+ // Check the contents of size_splits are all same
+ auto first_size = size_splits->at<loco::DataType::S32>(0);
+ for (uint32_t i = 1; i < num_split; i++)
+ {
+ if (first_size != size_splits->at<loco::DataType::S32>(i))
+ return false;
+ }
+
+ auto graph = sv->graph();
+ auto split_node = graph->nodes()->create<luci::CircleSplit>();
+ split_node->input(sv->input());
+ split_node->split_dim(sv->split_dim());
+ split_node->num_split(sv->num_split());
+ split_node->name(sv->name());
+ copy_quantparam(split_node, sv);
+ luci::add_origin(split_node, luci::get_origin(sv));
+
+ auto succs = loco::succs(sv);
+ for (auto succ : succs)
+ {
+ auto svo = loco::must_cast<luci::CircleSplitVOut *>(succ);
+ auto so_node = graph->nodes()->create<luci::CircleSplitOut>();
+ so_node->input(split_node);
+ so_node->index(svo->index());
+ so_node->name(svo->name());
+ copy_quantparam(so_node, svo);
+ luci::add_origin(so_node, luci::get_origin(svo));
+
+ replace(svo).with(so_node);
+ }
+
+ return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * EXAMPLE (SplitV with num_split = 2)
+ *
+ * BEFORE
+ * [CircleNode]
+ * |
+ * [CircleSplitV] (size_splits and split_dim are ignored)
+ * / \
+ * [CircleSplitVOut] [CircleSplitVOut]
+ * | |
+ * [CircleNode] [CircleNode]
+ *
+ * AFTER
+ * [CircleNode]
+ * / \
+ * [CircleSplit] [CircleSplitV] (dead)
+ * / \ \
+ * [CircleSplitOut] [CircleSplitOut] [CircleSplitVOut] * 2 (dead)
+ * | |
+ * [CircleNode] [CircleNode]
+ */
+bool SubstituteSplitVToSplitPass::run(loco::Graph *g)
+{
+ bool changed = false;
+ for (auto node : loco::active_nodes(loco::output_nodes(g)))
+ {
+ if (auto sv = dynamic_cast<luci::CircleSplitV *>(node))
+ {
+ if (resolve_splitv(sv))
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/SubstituteSplitVToSplitPass.test.cpp b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.test.cpp
new file mode 100644
index 000000000..6e30103f9
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.test.cpp
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstituteSplitVToSplitPass.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+const int N = 1;
+const int C = 32;
+const int H = 8;
+const int W = 8;
+
+// Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp
+template <typename T>
+luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+ const std::vector<uint32_t> &shape,
+ const std::vector<T> &values)
+{
+ auto node = g->nodes()->create<luci::CircleConst>();
+ node->dtype(dtype);
+ node->rank(shape.size());
+
+ uint32_t size = 1;
+ for (uint32_t i = 0; i < shape.size(); ++i)
+ {
+ node->dim(i) = shape.at(i);
+ size *= shape.at(i);
+ }
+ node->shape_status(luci::ShapeStatus::VALID);
+
+#define INIT_VALUES(DT) \
+ { \
+ node->size<DT>(size); \
+ for (uint32_t i = 0; i < values.size(); ++i) \
+ node->at<DT>(i) = values[i]; \
+ }
+
+ switch (dtype)
+ {
+ case loco::DataType::U8:
+ INIT_VALUES(loco::DataType::U8);
+ break;
+ case loco::DataType::S16:
+ INIT_VALUES(loco::DataType::S16);
+ break;
+ case loco::DataType::S32:
+ INIT_VALUES(loco::DataType::S32);
+ break;
+ case loco::DataType::FLOAT32:
+ INIT_VALUES(loco::DataType::FLOAT32)
+ break;
+ default:
+ INTERNAL_EXN("create_const_node called with unsupported type");
+ break;
+ }
+ return node;
+}
+/**
+ * graph having SplitV operator
+ *
+ * [CircleInput]
+ * |
+ * [CircleSplitV]
+ * / \
+ * [CircleSplitVOut] [CircleSplitVOut]
+ * | |
+ * [CircleOutput] [CircleOutput]
+ */
+class SplitVGraphlet
+{
+public:
+ SplitVGraphlet() = default;
+
+public:
+ void init(loco::Graph *g)
+ {
+ const std::vector<int32_t> splits{16, 16};
+ auto size_splits = create_const_node(g, loco::DataType::S32, {2}, splits);
+
+ const std::vector<int32_t> dim{3};
+ auto split_dim = create_const_node(g, loco::DataType::S32, {1}, dim);
+
+ _sv = g->nodes()->create<luci::CircleSplitV>();
+ _sv->size_splits(size_splits);
+ _sv->split_dim(split_dim);
+ _sv->num_split(2);
+ _sv->name("SplitV");
+
+ _svo1 = g->nodes()->create<luci::CircleSplitVOut>();
+ _svo1->input(_sv);
+ _svo1->index(0);
+ _svo1->name("SplitV0");
+
+ _svo2 = g->nodes()->create<luci::CircleSplitVOut>();
+ _svo2->input(_sv);
+ _svo2->index(1);
+ _svo2->name("SplitV1");
+ }
+
+public:
+ luci::CircleSplitV *split_v() { return _sv; }
+ luci::CircleSplitVOut *split_vo1() { return _svo1; }
+ luci::CircleSplitVOut *split_vo2() { return _svo2; }
+
+protected:
+ luci::CircleSplitV *_sv = nullptr;
+ luci::CircleSplitVOut *_svo1 = nullptr;
+ luci::CircleSplitVOut *_svo2 = nullptr;
+};
+
+class SplitVGraph : public TestIsGraphlet<1>, public TestOsGraphlet<2>, public SplitVGraphlet
+{
+public:
+ SplitVGraph() = default;
+
+ void init(void)
+ {
+ TestIsGraphlet<1>::init(g(), {{N, C, H, W}});
+ TestOsGraphlet<2>::init(g(), {{N, C, H / 2, W / 2}, {N, C, H / 2, W / 2}});
+ SplitVGraphlet::init(g());
+
+ split_v()->input(input(0));
+
+ output(0)->from(split_vo1());
+ output(1)->from(split_vo2());
+ }
+};
+
+class SubstituteSplitVToSplitPassTest : public ::testing::Test
+{
+public:
+ SplitVGraph g;
+ luci::SubstituteSplitVToSplitPass pass;
+};
+
+} // namespace
+
+/**
+ * Optimized graph looks like below.
+ *
+ * [CircleInput]
+ * |
+ * [CircleSplit]
+ * / \
+ * [CircleSplitOut] [CircleSplitOut]
+ * | |
+ * [CircleOutput] [CircleOutput]
+ */
+TEST_F(SubstituteSplitVToSplitPassTest, simple_test)
+{
+ g.init();
+
+ auto ret = pass.run(g.g());
+ EXPECT_EQ(true, ret);
+
+ auto so1 = dynamic_cast<luci::CircleSplitOut *>(g.output(0)->from());
+ EXPECT_NE(nullptr, so1);
+
+ auto so2 = dynamic_cast<luci::CircleSplitOut *>(g.output(1)->from());
+ EXPECT_NE(nullptr, so2);
+
+ EXPECT_EQ(so1->input(), so2->input());
+
+ auto s = dynamic_cast<luci::CircleSplit *>(so1->input());
+ EXPECT_NE(nullptr, s);
+
+ auto input = dynamic_cast<luci::CircleInput *>(s->input());
+ EXPECT_NE(nullptr, input);
+}
+
+TEST_F(SubstituteSplitVToSplitPassTest, wrong_condition_NEG)
+{
+ g.init();
+
+ g.split_v()->num_split(3); // Wrong num_split
+ auto ret = pass.run(g.g());
+
+ EXPECT_EQ(false, ret);
+}
diff --git a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp
index 74be86a4c..f48763782 100644
--- a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp
+++ b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp
@@ -76,6 +76,18 @@ std::vector<uint32_t> node_shape(const luci::CircleNode *input)
}
/**
+ * @brief copy quantparam of src to dst
+ */
+void copy_quantparam(luci::CircleNode *dst, const luci::CircleNode *src)
+{
+ auto q = src->quantparam();
+ if (q == nullptr)
+ dst->quantparam(nullptr);
+ else
+ dst->quantparam(std::make_unique<luci::CircleQuantParam>(*q));
+}
+
+/**
* @brief return CircleConst ptr with values of new_shape
*/
luci::CircleConst *create_shape_const(loco::Graph *graph, const std::vector<uint32_t> &new_shape)
@@ -130,6 +142,7 @@ bool substitute_squeeze_to_reshape(luci::CircleSqueeze *squeeze)
auto graph = squeeze->graph();
auto reshape = graph->nodes()->create<luci::CircleReshape>();
auto shape_const = create_shape_const(graph, reshape_shape);
+ copy_quantparam(reshape, squeeze);
reshape->name(name + "/Reshape");
luci::add_origin(reshape, luci::get_origin(squeeze));
shape_const->name(name + "/Reshape/shape");
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h
index 1706b9e43..bf3ff2e8a 100644
--- a/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h
@@ -324,6 +324,19 @@ private:
return true;
}
+ bool visit(const luci::CircleSplitV *node)
+ {
+ // node's output is the input of CircleSplitVOut, thus not quantized
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSplitVOut *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ return true;
+ }
+
bool visit(const luci::CircleStridedSlice *node)
{
RETURN_FALSE_UNLESS(is_lwq(node));
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h
index 3954bf216..9bc8b31df 100644
--- a/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h
@@ -310,6 +310,19 @@ private:
return true;
}
+ bool visit(const luci::CircleSplitV *node)
+ {
+ // node's output is the input of CircleSplitVOut, thus not quantized
+ RETURN_FALSE_UNLESS(is_lwq(node->input()));
+ return true;
+ }
+
+ bool visit(const luci::CircleSplitVOut *node)
+ {
+ RETURN_FALSE_UNLESS(is_lwq(node));
+ return true;
+ }
+
bool visit(const luci::CircleStridedSlice *node)
{
RETURN_FALSE_UNLESS(is_lwq(node));
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h b/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h
index 560abd2ff..eeec7b82b 100644
--- a/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h
@@ -310,6 +310,26 @@ private:
return true;
}
+ bool visit(const luci::CircleSplitV *node)
+ {
+ // node's output is the input of CircleSplitVOut, thus not quantized
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+ return true;
+ }
+
+ bool visit(const luci::CircleSplitVOut *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+
+ // SplitVOut has the same qparam with the input of SplitV
+ auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
+ auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
+ RETURN_FALSE_UNLESS(node->quantparam());
+ RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+ RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+ return true;
+ }
+
bool visit(const luci::CircleStridedSlice *node)
{
RETURN_FALSE_UNLESS(has_type(node, Type::S16))
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h b/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h
index 42cd1ce55..e7dd1b072 100644
--- a/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h
@@ -317,6 +317,26 @@ private:
return true;
}
+ bool visit(const luci::CircleSplitV *node)
+ {
+ // node's output is the input of CircleSplitVOut, thus not quantized
+ RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+ return true;
+ }
+
+ bool visit(const luci::CircleSplitVOut *node)
+ {
+ RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+
+ // SplitVOut has the same qparam with the input of SplitV
+ auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
+ auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
+ RETURN_FALSE_UNLESS(node->quantparam());
+ RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+ RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+ return true;
+ }
+
bool visit(const luci::CircleStridedSlice *node)
{
RETURN_FALSE_UNLESS(has_type(node, Type::U8))
diff --git a/compiler/luci/plan/CMakeLists.txt b/compiler/luci/plan/CMakeLists.txt
new file mode 100644
index 000000000..9ca6dcb41
--- /dev/null
+++ b/compiler/luci/plan/CMakeLists.txt
@@ -0,0 +1,15 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+if (NOT LIBRARY_TYPE)
+ set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_plan ${LIBRARY_TYPE} ${SOURCES})
+target_include_directories(luci_plan PRIVATE src)
+target_include_directories(luci_plan PUBLIC include)
+target_link_libraries(luci_plan PUBLIC loco)
+target_link_libraries(luci_plan PUBLIC luci_lang)
+
+install(TARGETS luci_plan DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+ FILES_MATCHING PATTERN "*.h")
diff --git a/compiler/luci/plan/include/luci/Plan/CircleNodeExecutionPlan.h b/compiler/luci/plan/include/luci/Plan/CircleNodeExecutionPlan.h
new file mode 100644
index 000000000..fe966e35e
--- /dev/null
+++ b/compiler/luci/plan/include/luci/Plan/CircleNodeExecutionPlan.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_NODE_EXECUTION_PLAN_H__
+#define __LUCI_CIRCLE_NODE_EXECUTION_PLAN_H__
+
+#include <luci/IR/CircleNode.h>
+
+#include <utility>
+
+namespace luci
+{
+
+class CircleNodeExecutionPlan
+{
+public:
+ CircleNodeExecutionPlan() = delete;
+
+ CircleNodeExecutionPlan(uint32_t order_in_plan, std::vector<uint32_t> offsets)
+ {
+ _order_in_plan = order_in_plan;
+ _offsets = std::move(offsets);
+ }
+
+ uint32_t order_in_plan(void) const { return _order_in_plan; }
+ void order_in_plan(const uint32_t &order_in_plan) { _order_in_plan = order_in_plan; }
+
+ std::vector<uint32_t> offsets(void) const { return _offsets; }
+ void offsets(const std::vector<uint32_t> &offsets) { _offsets = offsets; }
+
+private:
+ uint32_t _order_in_plan = 0;
+ std::vector<uint32_t> _offsets;
+};
+
+bool has_execution_plan(const luci::CircleNode *circle_node);
+
+void add_execution_plan(luci::CircleNode *circle_node,
+ const luci::CircleNodeExecutionPlan &execution_plan);
+
+luci::CircleNodeExecutionPlan get_execution_plan(const luci::CircleNode *circle_node);
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_NODE_EXECUTION_PLAN_H__
diff --git a/compiler/luci/plan/src/CircleNodeExecutionPlan.cpp b/compiler/luci/plan/src/CircleNodeExecutionPlan.cpp
new file mode 100644
index 000000000..a02ebc452
--- /dev/null
+++ b/compiler/luci/plan/src/CircleNodeExecutionPlan.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Plan/CircleNodeExecutionPlan.h"
+
+#include <loco.h>
+
+#include <stdexcept>
+#include <utility>
+
+namespace
+{
+
+/**
+ * @brief Set annotation for circle node execution plan
+ * @note Once CircleExecutionPlanAnnotation is annotated, it should not be changed.
+ * If CircleExecutionPlanAnnotation is needed to be changed, create
+ * new CircleExecutionPlanAnnotation.
+ */
+class CircleExecutionPlanAnnotation final : public loco::NodeAnnotation
+{
+public:
+ CircleExecutionPlanAnnotation() = delete;
+
+ explicit CircleExecutionPlanAnnotation(luci::CircleNodeExecutionPlan execution_plan)
+ : _execution_plan{std::move(execution_plan)}
+ {
+ // Do nothing
+ }
+
+public:
+ const luci::CircleNodeExecutionPlan &execution_plan(void) const { return _execution_plan; }
+ // No setter
+
+private:
+ luci::CircleNodeExecutionPlan _execution_plan;
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool has_execution_plan(const luci::CircleNode *circle_node)
+{
+ return circle_node->annot<CircleExecutionPlanAnnotation>() != nullptr;
+}
+
+void add_execution_plan(luci::CircleNode *circle_node,
+ const luci::CircleNodeExecutionPlan &execution_plan)
+{
+ circle_node->annot<CircleExecutionPlanAnnotation>(nullptr);
+ circle_node->annot(std::make_unique<CircleExecutionPlanAnnotation>(execution_plan));
+}
+
+luci::CircleNodeExecutionPlan get_execution_plan(const luci::CircleNode *circle_node)
+{
+ if (!has_execution_plan(circle_node))
+ throw std::runtime_error("Cannot find CircleNodeExecutionPlanAnnotation");
+
+ return circle_node->annot<CircleExecutionPlanAnnotation>()->execution_plan();
+}
+
+} // namespace luci
diff --git a/compiler/luci/profile/CMakeLists.txt b/compiler/luci/profile/CMakeLists.txt
index fdfcaf1de..ae604ab90 100644
--- a/compiler/luci/profile/CMakeLists.txt
+++ b/compiler/luci/profile/CMakeLists.txt
@@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
-add_library(luci_profile SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+ set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_profile ${LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_profile PRIVATE src)
target_include_directories(luci_profile PUBLIC include)
target_link_libraries(luci_profile PUBLIC loco)
diff --git a/compiler/luci/requires.cmake b/compiler/luci/requires.cmake
index 687bf573a..3ccc58128 100644
--- a/compiler/luci/requires.cmake
+++ b/compiler/luci/requires.cmake
@@ -5,6 +5,7 @@ require("locop")
require("logo")
require("logo-core")
require("mio-circle")
+require("mio-tflite")
require("oops")
require("hermes")
require("hermes-std")
diff --git a/compiler/luci/service/CMakeLists.txt b/compiler/luci/service/CMakeLists.txt
index 781e6d6de..f48210b9c 100644
--- a/compiler/luci/service/CMakeLists.txt
+++ b/compiler/luci/service/CMakeLists.txt
@@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
file(GLOB_RECURSE TESTS "src/*.test.cpp")
list(REMOVE_ITEM SOURCES ${TESTS})
-add_library(luci_service SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+ set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_service ${LIBRARY_TYPE} ${SOURCES})
target_include_directories(luci_service PRIVATE src)
target_include_directories(luci_service PUBLIC include)
target_link_libraries(luci_service PUBLIC luci_lang)
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
index fade2cbd0..5f6d46f2b 100644
--- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
@@ -314,8 +314,7 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
return input_type;
}
- // TODO support S16
- loco::DataType visit(const luci::CircleQuantize *) final { return loco::DataType::U8; }
+ loco::DataType visit(const luci::CircleQuantize *node) final { return luci::dtype_get(node); }
loco::DataType visit(const luci::CircleRange *node) final
{
diff --git a/compiler/mio-circle/CMakeLists.txt b/compiler/mio-circle/CMakeLists.txt
index 9c1126d6f..fa05ef0fa 100644
--- a/compiler/mio-circle/CMakeLists.txt
+++ b/compiler/mio-circle/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers QUIET)
+nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
if(NOT FlatBuffers_FOUND)
return()
diff --git a/compiler/mio-tflite/CMakeLists.txt b/compiler/mio-tflite/CMakeLists.txt
index 9ef2859b9..4660e4003 100644
--- a/compiler/mio-tflite/CMakeLists.txt
+++ b/compiler/mio-tflite/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers QUIET)
+nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
if(NOT FlatBuffers_FOUND)
message(STATUS "Build mio-tflite: FAILED (missing Flatbuffers)")
@@ -36,3 +36,13 @@ target_link_libraries(mio_tflite_example mio_tflite)
# TODO provide full tflite validation with runtime/interpreter
add_executable(mio_tflite_validate example.cpp)
target_link_libraries(mio_tflite_validate mio_tflite)
+
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
+
+if(NOT TensorFlowGEMMLowpSource_FOUND)
+ return()
+endif(NOT TensorFlowGEMMLowpSource_FOUND)
+
+add_library(mio_tflite_inc INTERFACE)
+target_include_directories(mio_tflite_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}")
+target_include_directories(mio_tflite_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
diff --git a/compiler/mio-tflite260/CMakeLists.txt b/compiler/mio-tflite260/CMakeLists.txt
new file mode 100644
index 000000000..39f4d9a31
--- /dev/null
+++ b/compiler/mio-tflite260/CMakeLists.txt
@@ -0,0 +1,49 @@
+nnas_find_package(FlatBuffers EXACT 1.12 QUIET)
+
+if(NOT FlatBuffers_FOUND)
+ message(STATUS "Build mio-tflite260: FAILED (missing Flatbuffers 1.12)")
+ return()
+endif(NOT FlatBuffers_FOUND)
+
+nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+
+if(NOT TensorFlowSource_FOUND)
+ message(STATUS "Build mio-tflite260: FAILED (missing TensorFlowSource 2.6.0)")
+ return()
+endif(NOT TensorFlowSource_FOUND)
+
+message(STATUS "Build mio-tflite260: TRUE")
+
+set(SCHEMA_FILE "${TensorFlowSource_DIR}/tensorflow/lite/schema/schema.fbs")
+
+# NOTE Use copy of schema.fbs as to provide unified way for circle also
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+ COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+ WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+ DEPENDS "${SCHEMA_FILE}"
+)
+
+FlatBuffers_Target(mio_tflite260
+ OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/tflite"
+ INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+ SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+ SCHEMA_FILES "schema.fbs"
+)
+
+add_executable(mio_tflite260_example example.cpp)
+target_link_libraries(mio_tflite260_example mio_tflite260)
+
+# Temporay tflite validation tool to replace nnkit-tflite
+# TODO provide full tflite validation with runtime/interpreter
+add_executable(mio_tflite260_validate example.cpp)
+target_link_libraries(mio_tflite260_validate mio_tflite260)
+
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+
+if(NOT TensorFlowGEMMLowpSource_FOUND)
+ return()
+endif(NOT TensorFlowGEMMLowpSource_FOUND)
+
+add_library(mio_tflite260_inc INTERFACE)
+target_include_directories(mio_tflite260_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}")
+target_include_directories(mio_tflite260_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
diff --git a/compiler/mio-tflite260/README.md b/compiler/mio-tflite260/README.md
new file mode 100644
index 000000000..970569b47
--- /dev/null
+++ b/compiler/mio-tflite260/README.md
@@ -0,0 +1,3 @@
+# mio-tflite260
+
+_mio-tflite260_ provides a library to access TensorFlow lite model files with V2.6.0.
diff --git a/compiler/mio-tflite260/example.cpp b/compiler/mio-tflite260/example.cpp
new file mode 100644
index 000000000..2787a3c2d
--- /dev/null
+++ b/compiler/mio-tflite260/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-tflite260"
+//
+#include <mio/tflite/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+ std::ifstream ifs(argv[1], std::ios_base::binary);
+ std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+ flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+ if (!tflite::VerifyModelBuffer(verifier))
+ {
+ std::cout << "Fail" << std::endl;
+ return 255;
+ }
+
+ std::cout << "Pass" << std::endl;
+ return 0;
+}
diff --git a/compiler/mir/src/mir_tflite_importer/CMakeLists.txt b/compiler/mir/src/mir_tflite_importer/CMakeLists.txt
index 952857c86..42eb4f8a5 100644
--- a/compiler/mir/src/mir_tflite_importer/CMakeLists.txt
+++ b/compiler/mir/src/mir_tflite_importer/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers REQUIRED)
+nnas_find_package(FlatBuffers EXACT 1.10 REQUIRED)
if (NOT FlatBuffers_FOUND)
return()
diff --git a/compiler/one-cmds/CMakeLists.txt b/compiler/one-cmds/CMakeLists.txt
index fc89f4da5..729bfa80a 100644
--- a/compiler/one-cmds/CMakeLists.txt
+++ b/compiler/one-cmds/CMakeLists.txt
@@ -41,7 +41,6 @@ set(ONE_UTILITY_FILES
one-build.template.cfg
onecc.template.cfg
utils.py
- conv_mixin_1.8.0.patch
)
foreach(ONE_UTILITY IN ITEMS ${ONE_UTILITY_FILES})
diff --git a/compiler/one-cmds/conv_mixin_1.8.0.patch b/compiler/one-cmds/conv_mixin_1.8.0.patch
deleted file mode 100644
index 96a0f41cf..000000000
--- a/compiler/one-cmds/conv_mixin_1.8.0.patch
+++ /dev/null
@@ -1,11 +0,0 @@
---- a/onnx_tf/handlers/backend/conv_mixin.py
-+++ b/onnx_tf/handlers/backend/conv_mixin.py
-@@ -98,7 +98,7 @@
- depthwise = (x_rank == 4 and len(weight_shape) == 4 and group != 1 and
- not transpose and not (None in weight_shape))
- if depthwise and isinstance(x_shape, np.ndarray):
-- depthwise = group == x_shape[1]
-+ depthwise = bool(group == x_shape[1])
-
- if depthwise is True:
- # Depthwise convolution.
diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt
index f86709489..0a0c4b14c 100644
--- a/compiler/one-cmds/how-to-use-one-commands.txt
+++ b/compiler/one-cmds/how-to-use-one-commands.txt
@@ -150,11 +150,14 @@ one-optimize provides network or operator transformation shown below.
Current transformation options are
- disable_validation : This will turn off operator validations.
+- expand_broadcast_const : This will expand broadcastable constant node inputs
- fold_add_v2 : This removes AddV2 operation which can be folded
- fold_cast : This removes Cast operation which can be folded
- fold_dequantize : This removes Dequantize operation which can be folded
+- fold_dwconv : This folds Depthwise Convolution operation which can be folded
- fold_sparse_to_dense : This removes SparseToDense operation which can be folded
- forward_reshape_to_unaryop: This will move Reshape after UnaryOp for centain condition
+- fuse_add_with_fully_connected: This fuses Add operator with the preceding FullyConnected operator if possible
- fuse_add_with_tconv: This fuses Add operator with the preceding TConv operator if possible
- fuse_batchnorm_with_conv : This fuses BatchNorm operator to convolution operator
- fuse_batchnorm_with_dwconv : This fuses BatchNorm operator to depthwise convolution operator
@@ -192,6 +195,8 @@ Current transformation options are
- shuffle_weight_to_16x1float32 : This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32.
Note that it only converts weights whose row is a multiple of 16.
- substitute_pack_to_reshape : This will convert single input Pack to Reshape.
+- substitute_padv2_to_pad : This will convert certain condition PadV2 to Pad.
+- substitute_splitv_to_split : This will convert certain condition SplitV to Split.
- substitute_squeeze_to_reshape : This will convert certain condition Squeeze to Reshape.
- substitute_strided_slice_to_reshape : This will convert certain condition StridedSlice to Reshape.
- substitute_transpose_to_reshape : This will convert certain condition Transpose to Reshape.
diff --git a/compiler/one-cmds/one-codegen b/compiler/one-cmds/one-codegen
index a496a54ec..726538d44 100644
--- a/compiler/one-cmds/one-codegen
+++ b/compiler/one-cmds/one-codegen
@@ -28,6 +28,7 @@ import os
import subprocess
import sys
import tempfile
+import shutil
import utils as _utils
@@ -49,6 +50,7 @@ def _get_backends_list():
The list where `one-codegen` finds its backends
- `bin` folder where `one-codegen` exists
- `backends` folder
+ - System path
NOTE If there are backends of the same name in different places,
the closer to the top in the list, the higher the priority.
@@ -151,6 +153,10 @@ def main():
if ntpath.basename(cand) == backend_base:
codegen_path = cand
if not codegen_path:
+ # Find backend from system path
+ codegen_path = shutil.which(backend_base)
+
+ if not codegen_path:
raise FileNotFoundError(backend_base + ' not found')
codegen_cmd = [codegen_path] + backend_args + unknown_args
if _utils._is_valid_attr(args, 'command'):
diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv
index fbc3a75de..285191761 100644
--- a/compiler/one-cmds/one-prepare-venv
+++ b/compiler/one-cmds/one-prepare-venv
@@ -34,8 +34,8 @@ fi
# - https://github.com/onnx/onnx-tensorflow/blob/master/Versioning.md
VER_TENSORFLOW=2.3.0
-VER_ONNX=1.8.0
-VER_ONNX_TF=1.8.0
+VER_ONNX=1.10.1
+VER_ONNX_TF=1.9.0
# Install tensorflow
@@ -61,7 +61,7 @@ ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW}
${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow==6.2.2
# Install PyTorch and ONNX related
-${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.8.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
# Provide install of custom onnx-tf
if [ -n "${EXT_ONNX_TF_WHL}" ]; then
@@ -69,23 +69,3 @@ if [ -n "${EXT_ONNX_TF_WHL}" ]; then
else
${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX} onnx-tf==${VER_ONNX_TF}
fi
-
-# TODO remove this patch after onnx-tf next release
-# apply patch for DWConv conversion bug: https://github.com/onnx/onnx-tensorflow/pull/905
-if [[ -z "${EXT_ONNX_TF_WHL}" ]]; then
- PY_SITE_PACKAGES=$(${VENV_PYTHON} -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
- if [[ -d ${PY_SITE_PACKAGES} ]]; then
- pushd ${PY_SITE_PACKAGES} > /dev/null
- PATCH_TARGET_FILE=onnx_tf/handlers/backend/conv_mixin.py
- if [[ -f "${PATCH_TARGET_FILE}" ]]; then
- # if patch is already applied, error code is 1
- # catch error code and check if this is the case
- set +e
- patch -t -N -p1 < ${DRIVER_PATH}/conv_mixin_1.8.0.patch
- ret_code=$?
- [[ $ret_code -gt 1 ]] && exit $ret_code
- set -e
- fi
- popd > /dev/null
- fi
-fi
diff --git a/compiler/one-cmds/one-profile b/compiler/one-cmds/one-profile
index 798cc756c..ed6d8bd7a 100644
--- a/compiler/one-cmds/one-profile
+++ b/compiler/one-cmds/one-profile
@@ -157,14 +157,7 @@ def main():
profile_cmd += getattr(args, 'command').split()
# run backend driver
- with subprocess.Popen(
- profile_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
- bufsize=1) as p:
- for line in p.stdout:
- sys.stdout.buffer.write(line)
- sys.stdout.buffer.flush()
- if p.returncode != 0:
- sys.exit(p.returncode)
+ _utils._run(profile_cmd, err_prefix=backend_base)
if __name__ == '__main__':
diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize
index 25ef17ab1..cd623a6f8 100644
--- a/compiler/one-cmds/one-quantize
+++ b/compiler/one-cmds/one-quantize
@@ -88,6 +88,17 @@ def _get_parser():
type=str,
help='record mode (supported: percentile/moving_average, default=percentile)')
+ # arguments for force_quantparam
+ parser.add_argument(
+ '--force_quantparam',
+ action='store_true',
+ help='write quantparam to the specified tensor')
+ parser.add_argument(
+ '--tensor_name', type=str, action='append', help='tensor name (string)')
+ parser.add_argument('--scale', type=float, action='append', help='scale (float)')
+ parser.add_argument(
+ '--zero_point', type=int, action='append', help='zero point (int)')
+
return parser
@@ -114,8 +125,22 @@ def _verify_arg(parser, args):
missing.append('-i/--input_path')
if not _utils._is_valid_attr(args, 'output_path'):
missing.append('-o/--output_path')
+ if _utils._is_valid_attr(args, 'force_quantparam'):
+ if not _utils._is_valid_attr(args, 'tensor_name'):
+ missing.append('--tensor_name')
+ if not _utils._is_valid_attr(args, 'scale'):
+ missing.append('--scale')
+ if not _utils._is_valid_attr(args, 'zero_point'):
+ missing.append('--zero_point')
if len(missing):
parser.error('the following arguments are required: ' + ' '.join(missing))
+ if _utils._is_valid_attr(args, 'force_quantparam'):
+ tensors = getattr(args, 'tensor_name')
+ scales = getattr(args, 'scale')
+ zerops = getattr(args, 'zero_point')
+ if len(tensors) != len(scales) or len(tensors) != len(zerops):
+ parser.error(
+ 'The same number of tensor_name, scale, and zero_point should be given.')
def _parse_arg(parser):
@@ -128,6 +153,11 @@ def _parse_arg(parser):
def _quantize(args):
+ if _utils._is_valid_attr(args, 'force_quantparam'):
+ # write quantization parameters
+ _write_qparam(args)
+ return
+
# get file path to log
dir_path = os.path.dirname(os.path.realpath(__file__))
logfile_path = os.path.realpath(args.output_path) + '.log'
@@ -233,6 +263,43 @@ def _quantize(args):
_utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+def _write_qparam(args):
+ # get file path to log
+ dir_path = os.path.dirname(os.path.realpath(__file__))
+ logfile_path = os.path.realpath(args.output_path) + '.log'
+
+ with open(logfile_path, 'wb') as f:
+ # get driver path
+ circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+
+ # make a command to write qparams to the tensors
+ circle_quantizer_cmd = [circle_quantizer_path]
+ # verbose
+ if _utils._is_valid_attr(args, 'verbose'):
+ circle_quantizer_cmd.append('--verbose')
+ if _utils._is_valid_attr(args, 'tensor_name'):
+ tensor_name = getattr(args, 'tensor_name')
+ if _utils._is_valid_attr(args, 'scale'):
+ scale = getattr(args, 'scale')
+ if _utils._is_valid_attr(args, 'zero_point'):
+ zero_point = getattr(args, 'zero_point')
+ for (t, s, zp) in zip(tensor_name, scale, zero_point):
+ circle_quantizer_cmd.append('--force_quantparam')
+ circle_quantizer_cmd.append(t)
+ circle_quantizer_cmd.append(str(s))
+ circle_quantizer_cmd.append(str(zp))
+ # input and output path
+ if _utils._is_valid_attr(args, 'input_path'):
+ circle_quantizer_cmd.append(getattr(args, 'input_path'))
+ if _utils._is_valid_attr(args, 'output_path'):
+ circle_quantizer_cmd.append(getattr(args, 'output_path'))
+
+ f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
+
+ # run circle-quantizer
+ _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+
+
def main():
# parse arguments
parser = _get_parser()
diff --git a/compiler/one-cmds/tests/one-import_neg_002.test b/compiler/one-cmds/tests/one-import_neg_002.test
index 738c2cba9..9cf0b1401 100644
--- a/compiler/one-cmds/tests/one-import_neg_002.test
+++ b/compiler/one-cmds/tests/one-import_neg_002.test
@@ -21,10 +21,16 @@ filename="${filename_ext%.*}"
trap_err_onexit()
{
+ # TF2.3.0
if grep -q "is incompatible with result type" "${filename}.log"; then
echo "${filename_ext} SUCCESS"
exit 0
fi
+ # TF2.6.0
+ if grep -q "is incompatible with body result type" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
echo "${filename_ext} FAILED"
exit 255
diff --git a/compiler/one-cmds/tests/one-import_neg_006.test b/compiler/one-cmds/tests/one-import_neg_006.test
index 7c63ee3e4..3fb5c7df1 100644
--- a/compiler/one-cmds/tests/one-import_neg_006.test
+++ b/compiler/one-cmds/tests/one-import_neg_006.test
@@ -45,5 +45,8 @@ one-import tf \
--input_arrays input --input_shapes "0,299,299,3" \
--output_arrays InceptionV3/Predictions/Reshape_1 > ${filename}.log 2>&1
-echo "${filename_ext} FAILED"
-exit 255
+# NOTE TF2.3.0 fails(which is expected) but doesn't for TF2.5(4?) and above
+# https://github.com/tensorflow/tensorflow/issues/51756 for details
+# TODO exit 255
+echo "${filename_ext} SKIPPED"
+exit 0
diff --git a/compiler/one-cmds/tests/one-quantize_005.test b/compiler/one-cmds/tests/one-quantize_005.test
new file mode 100644
index 000000000..8449df6ae
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_005.test
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.mat.q8.circle"
+outputfile="./inception_v3.one-quantize_005.q8.circle"
+
+rm -rf ${outputfile}
+
+# run test with force_quantparam option
+one-quantize \
+--force_quantparam \
+--tensor_name input \
+--scale 2.3 \
+--zero_point 33 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_006.test b/compiler/one-cmds/tests/one-quantize_006.test
new file mode 100644
index 000000000..92b9ebebb
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_006.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.mat.q8.circle"
+outputfile="./inception_v3.one-quantize_006.q8.circle"
+
+rm -rf ${outputfile}
+
+# run test with force_quantparam option (multi tensors)
+one-quantize \
+--force_quantparam \
+--tensor_name input \
+--scale 2.3 \
+--zero_point 33 \
+--tensor_name InceptionV3/Predictions/Reshape_1 \
+--scale 2.3 \
+--zero_point 33 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_neg_018.test b/compiler/one-cmds/tests/one-quantize_neg_018.test
new file mode 100644
index 000000000..6937caf4d
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_018.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid min_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ if grep -q "following arguments are required: --zero_point" "${filename}.log"; then
+ echo "${filename_ext} SUCCESS"
+ exit 0
+ fi
+
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.mat.q8.circle"
+outputfile="./inception_v3.neg_018.q8.circle"
+
+rm -rf ${outputfile}.log
+
+# run test
+one-quantize \
+--force_quantparam \
+--tensor_name input \
+--scale 2.3 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_022.cfg b/compiler/one-cmds/tests/onecc_022.cfg
new file mode 100644
index 000000000..9741d5173
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_022.cfg
@@ -0,0 +1,18 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+one-profile=False
+
+[one-quantize]
+input_path=inception_v3.mat.q8.circle
+output_path=inception_v3.onecc_022.q8.circle
+force_quantparam=True
+tensor_name=input
+scale=2.1
+zero_point=45
diff --git a/compiler/one-cmds/tests/onecc_022.test b/compiler/one-cmds/tests/onecc_022.test
new file mode 100644
index 000000000..3aaa26fea
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_022.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+ echo "${filename_ext} FAILED"
+ exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_022.cfg"
+outputfile="inception_v3.onecc_022.q8.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+ trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/prepare_test_materials.sh b/compiler/one-cmds/tests/prepare_test_materials.sh
index 694651d74..7f269530c 100644
--- a/compiler/one-cmds/tests/prepare_test_materials.sh
+++ b/compiler/one-cmds/tests/prepare_test_materials.sh
@@ -103,4 +103,14 @@ if [[ ! -s ${outputfile} ]]; then
--output_arrays InceptionV3/Predictions/Reshape_1
fi
+# prepare 'inception_v3.mat.q8.circle' file used for quantization test
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.mat.q8.circle"
+
+if [[ ! -s ${outputfile} ]]; then
+ ../bin/one-quantize \
+ --input_path ${inputfile} \
+ --output_path ${outputfile}
+fi
+
popd > /dev/null
diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py
index f18dc6f56..efb01a210 100644
--- a/compiler/one-cmds/utils.py
+++ b/compiler/one-cmds/utils.py
@@ -29,6 +29,7 @@ class _CONSTANT:
('convert_nchw_to_nhwc',
'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.'
),
+ ('expand_broadcast_const', 'expand broadcastable constant node inputs'),
('nchw_to_nhwc_input_shape',
'convert the input shape of the model (argument for convert_nchw_to_nhwc)'),
('nchw_to_nhwc_output_shape',
@@ -36,9 +37,11 @@ class _CONSTANT:
('fold_add_v2', 'fold AddV2 op with constant inputs'),
('fold_cast', 'fold Cast op with constant input'),
('fold_dequantize', 'fold Dequantize op'),
+ ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'),
('fold_sparse_to_dense', 'fold SparseToDense op'),
('forward_reshape_to_unaryop', 'Forward Reshape op'),
('fuse_add_with_tconv', 'fuse Add op to Transposed'),
+ ('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'),
('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'),
('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'),
('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'),
@@ -74,6 +77,8 @@ class _CONSTANT:
'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.'
' Note that it only converts weights whose row is a multiple of 16'),
('substitute_pack_to_reshape', 'convert single input Pack op to Reshape op'),
+ ('substitute_padv2_to_pad', 'convert certain condition PadV2 to Pad'),
+ ('substitute_splitv_to_split', 'convert certain condition SplitV to Split'),
('substitute_squeeze_to_reshape', 'convert certain condition Squeeze to Reshape'),
('substitute_strided_slice_to_reshape',
'convert certain condition StridedSlice to Reshape'),
@@ -107,6 +112,14 @@ def _add_default_arg(parser):
parser.add_argument('-S', '--section', type=str, help=argparse.SUPPRESS)
+def is_accumulated_arg(arg, driver):
+ if driver == "one-quantize":
+ if arg == "tensor_name" or arg == "scale" or arg == "zero_point":
+ return True
+
+ return False
+
+
def _is_valid_attr(args, attr):
return hasattr(args, attr) and getattr(args, attr)
@@ -124,6 +137,12 @@ def _parse_cfg(args, driver_name):
raise AssertionError('configuration file must have \'' + driver_name +
'\' section')
for key in config[args.section]:
+ if is_accumulated_arg(key, driver_name):
+ if not _is_valid_attr(args, key):
+ setattr(args, key, [config[args.section][key]])
+ else:
+ getattr(args, key).append(config[args.section][key])
+ continue
if not _is_valid_attr(args, key):
setattr(args, key, config[args.section][key])
# if section is not given, section name is same with its driver name
@@ -133,6 +152,12 @@ def _parse_cfg(args, driver_name):
'\' section')
secton_to_run = driver_name
for key in config[secton_to_run]:
+ if is_accumulated_arg(key, driver_name):
+ if not _is_valid_attr(args, key):
+ setattr(args, key, [config[secton_to_run][key]])
+ else:
+ getattr(args, key).append(config[secton_to_run][key])
+ continue
if not _is_valid_attr(args, key):
setattr(args, key, config[secton_to_run][key])
@@ -242,33 +267,26 @@ def _run(cmd, err_prefix=None, logfile=None):
err_prefix: prefix to be put before every stderr lines
logfile: file stream to which both of stdout and stderr lines will be written
"""
- if logfile == None:
- with subprocess.Popen(cmd, stderr=subprocess.PIPE, bufsize=1) as p:
- for line in p.stderr:
- if err_prefix:
- line = f"{err_prefix}: ".encode() + line
- sys.stderr.buffer.write(line)
- sys.stderr.buffer.flush()
- else:
- with subprocess.Popen(
- cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1) as p:
- import select
- inputs = set([p.stdout, p.stderr])
- while inputs:
- readable, _, _ = select.select(inputs, [], [])
- for x in readable:
- line = x.readline()
- if len(line) == 0:
- inputs.discard(x)
- continue
- if x == p.stdout:
- out = sys.stdout
- if x == p.stderr:
- out = sys.stderr
- if err_prefix:
- line = f"{err_prefix}: ".encode() + line
- out.buffer.write(line)
- out.buffer.flush()
+ with subprocess.Popen(
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1) as p:
+ import select
+ inputs = set([p.stdout, p.stderr])
+ while inputs:
+ readable, _, _ = select.select(inputs, [], [])
+ for x in readable:
+ line = x.readline()
+ if len(line) == 0:
+ inputs.discard(x)
+ continue
+ if x == p.stdout:
+ out = sys.stdout
+ if x == p.stderr:
+ out = sys.stderr
+ if err_prefix:
+ line = f"{err_prefix}: ".encode() + line
+ out.buffer.write(line)
+ out.buffer.flush()
+ if logfile != None:
logfile.write(line)
if p.returncode != 0:
sys.exit(p.returncode)
diff --git a/compiler/pota-quantization-value-test/CMakeLists.txt b/compiler/pota-quantization-value-test/CMakeLists.txt
index 80661e566..00ffb57de 100644
--- a/compiler/pota-quantization-value-test/CMakeLists.txt
+++ b/compiler/pota-quantization-value-test/CMakeLists.txt
@@ -1,7 +1,7 @@
unset(QUANTIZATION_VALUE_TEST)
unset(QUANTIZATION_VALUE_TEST_WITH_PARAM)
-nnas_find_package(FlatBuffers QUIET)
+nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
if(NOT FlatBuffers_FOUND)
message(STATUS "Build pota-quantization-value-test: FAILED (missing FlatBuffers)")
return()
@@ -25,7 +25,7 @@ get_target_property(SCHEMA_BIN_PATH mio_circle BINARY_DIR)
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/gen_h5_explicit_inputs.py"
"${CMAKE_CURRENT_BINARY_DIR}/gen_h5_explicit_inputs.py" COPYONLY)
-set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_3_0")
+set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_6_0")
###
### Generate test.config
diff --git a/compiler/pota-quantization-value-test/requires.cmake b/compiler/pota-quantization-value-test/requires.cmake
index 883a925df..4eb7204e1 100644
--- a/compiler/pota-quantization-value-test/requires.cmake
+++ b/compiler/pota-quantization-value-test/requires.cmake
@@ -2,3 +2,4 @@ require("record-minmax")
require("circle-quantizer")
require("circle-tensordump")
require("common-artifacts")
+require("mio-circle")
diff --git a/compiler/tfl-inspect/CMakeLists.txt b/compiler/tfl-inspect/CMakeLists.txt
index ba019865f..6ba55c357 100644
--- a/compiler/tfl-inspect/CMakeLists.txt
+++ b/compiler/tfl-inspect/CMakeLists.txt
@@ -10,5 +10,5 @@ add_executable(tfl-inspect ${DRIVER} ${SOURCES})
target_include_directories(tfl-inspect PRIVATE src)
target_link_libraries(tfl-inspect arser)
target_link_libraries(tfl-inspect foder)
-target_link_libraries(tfl-inspect mio_tflite)
+target_link_libraries(tfl-inspect mio_tflite260)
target_link_libraries(tfl-inspect safemain)
diff --git a/compiler/tfl-inspect/requires.cmake b/compiler/tfl-inspect/requires.cmake
index 25857ad2b..9a7477b81 100644
--- a/compiler/tfl-inspect/requires.cmake
+++ b/compiler/tfl-inspect/requires.cmake
@@ -1,4 +1,4 @@
require("arser")
require("foder")
-require("mio-tflite")
+require("mio-tflite260")
require("safemain")
diff --git a/compiler/tfl-inspect/src/Reader.cpp b/compiler/tfl-inspect/src/Reader.cpp
index 5be289446..41a8396bb 100644
--- a/compiler/tfl-inspect/src/Reader.cpp
+++ b/compiler/tfl-inspect/src/Reader.cpp
@@ -16,21 +16,34 @@
#include "Reader.h"
+#include <cassert>
#include <sstream>
#include <string>
namespace tflinspect
{
+// This will provide v3/v3a format neutral BuiltinOperator
+tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
+{
+ assert(opcode != nullptr);
+ int8_t dp_code = opcode->deprecated_builtin_code();
+ // 127 is max of int8_t which is upper bound of v3 builtin_code
+ // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127
+ if (dp_code < 127 && dp_code >= 0)
+ return tflite::BuiltinOperator(dp_code);
+ return opcode->builtin_code();
+}
+
bool is_valid(const tflite::OperatorCode *opcode)
{
- tflite::BuiltinOperator code = opcode->builtin_code();
+ tflite::BuiltinOperator code = builtin_code_neutral(opcode);
return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
}
bool is_custom(const tflite::OperatorCode *opcode)
{
- tflite::BuiltinOperator code = opcode->builtin_code();
+ tflite::BuiltinOperator code = builtin_code_neutral(opcode);
return (code == tflite::BuiltinOperator_CUSTOM);
}
@@ -56,7 +69,7 @@ std::string opcode_name(const tflite::OperatorCode *opcode)
return custom_op;
}
- tflite::BuiltinOperator code = opcode->builtin_code();
+ tflite::BuiltinOperator code = builtin_code_neutral(opcode);
return tflite::EnumNameBuiltinOperator(code);
}
@@ -122,7 +135,7 @@ tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const
assert(index < _op_codes.size());
const tflite::OperatorCode *opcode = _op_codes.at(index);
- return opcode->builtin_code();
+ return tflinspect::builtin_code_neutral(opcode);
}
std::string Reader::opcode_name(const tflite::Operator *op) const
diff --git a/compiler/tfl-inspect/src/Reader.h b/compiler/tfl-inspect/src/Reader.h
index e9e182a4b..91b7bb940 100644
--- a/compiler/tfl-inspect/src/Reader.h
+++ b/compiler/tfl-inspect/src/Reader.h
@@ -36,6 +36,7 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
return ret;
}
+tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode);
bool is_valid(const tflite::OperatorCode *opcode);
bool is_custom(const tflite::OperatorCode *opcode);
std::string opcode_name(const tflite::OperatorCode *opcode);
diff --git a/compiler/tfl-verify/CMakeLists.txt b/compiler/tfl-verify/CMakeLists.txt
index 4421a4660..a87d30c5e 100644
--- a/compiler/tfl-verify/CMakeLists.txt
+++ b/compiler/tfl-verify/CMakeLists.txt
@@ -8,6 +8,6 @@ add_executable(tfl-verify ${SOURCES})
target_include_directories(tfl-verify PRIVATE src)
target_link_libraries(tfl-verify arser)
target_link_libraries(tfl-verify foder)
-target_link_libraries(tfl-verify mio_tflite)
+target_link_libraries(tfl-verify mio_tflite260)
target_link_libraries(tfl-verify safemain)
target_link_libraries(tfl-verify cwrap)
diff --git a/compiler/tfl-verify/requires.cmake b/compiler/tfl-verify/requires.cmake
index 79503f325..72803d890 100644
--- a/compiler/tfl-verify/requires.cmake
+++ b/compiler/tfl-verify/requires.cmake
@@ -1,5 +1,5 @@
require("arser")
require("foder")
-require("mio-tflite")
+require("mio-tflite260")
require("safemain")
require("cwrap")
diff --git a/compiler/tflchef/CMakeLists.txt b/compiler/tflchef/CMakeLists.txt
index ebc873342..ac7fe4b7c 100644
--- a/compiler/tflchef/CMakeLists.txt
+++ b/compiler/tflchef/CMakeLists.txt
@@ -5,10 +5,10 @@ if(NOT Protobuf_FOUND)
return()
endif(NOT Protobuf_FOUND)
-if(NOT TARGET mio_tflite)
- message(STATUS "Build tflchef: FAILED (missing mio_tflite)")
+if(NOT TARGET mio_tflite260)
+ message(STATUS "Build tflchef: FAILED (missing mio_tflite260)")
return()
-endif(NOT TARGET mio_tflite)
+endif(NOT TARGET mio_tflite260)
# Recipe Parser
add_subdirectory(proto)
diff --git a/compiler/tflchef/core/CMakeLists.txt b/compiler/tflchef/core/CMakeLists.txt
index 43f6b8b03..413b78b15 100644
--- a/compiler/tflchef/core/CMakeLists.txt
+++ b/compiler/tflchef/core/CMakeLists.txt
@@ -5,5 +5,5 @@ target_include_directories(tflchef_core PUBLIC include)
target_include_directories(tflchef_core PRIVATE src)
target_link_libraries(tflchef_core tflchef_proto)
target_link_libraries(tflchef_core tflchef_log)
-target_link_libraries(tflchef_core mio_tflite)
+target_link_libraries(tflchef_core mio_tflite260)
target_link_libraries(tflchef_core souschef)
diff --git a/compiler/tflchef/core/src/CustomOp/AddV2.cpp b/compiler/tflchef/core/src/CustomOp/AddV2.cpp
index dffd336cd..557c20bce 100644
--- a/compiler/tflchef/core/src/CustomOp/AddV2.cpp
+++ b/compiler/tflchef/core/src/CustomOp/AddV2.cpp
@@ -17,7 +17,7 @@
#include "AddV2.h"
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
flatbuffers::Offset<void> AddV2Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
{
diff --git a/compiler/tflchef/core/src/CustomOp/All.cpp b/compiler/tflchef/core/src/CustomOp/All.cpp
index b3ae821a4..bbef5ecaa 100644
--- a/compiler/tflchef/core/src/CustomOp/All.cpp
+++ b/compiler/tflchef/core/src/CustomOp/All.cpp
@@ -17,7 +17,7 @@
#include "All.h"
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
flatbuffers::Offset<void> AllChef::value(flatbuffers::FlatBufferBuilder &fbb) const
{
diff --git a/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp b/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp
index 595f3b9bb..6d2c5b13b 100644
--- a/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp
+++ b/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp
@@ -17,7 +17,7 @@
#include "BatchMatMulV2.h"
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
flatbuffers::Offset<void> BatchMatMulV2Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
{
diff --git a/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp b/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp
index fc429e2f7..dd458b376 100644
--- a/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp
+++ b/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp
@@ -17,7 +17,7 @@
#include "BroadcastTo.h"
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
flatbuffers::Offset<void> BroadcastToChef::value(flatbuffers::FlatBufferBuilder &fbb) const
{
diff --git a/compiler/tflchef/core/src/CustomOp/MatMul.cpp b/compiler/tflchef/core/src/CustomOp/MatMul.cpp
index ba34aa8db..e7c707d37 100644
--- a/compiler/tflchef/core/src/CustomOp/MatMul.cpp
+++ b/compiler/tflchef/core/src/CustomOp/MatMul.cpp
@@ -17,7 +17,7 @@
#include "MatMul.h"
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
flatbuffers::Offset<void> MatMulChef::value(flatbuffers::FlatBufferBuilder &fbb) const
{
diff --git a/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp b/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp
index d12597edb..b25003227 100644
--- a/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp
+++ b/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp
@@ -17,7 +17,7 @@
#include "MatrixBandPart.h"
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
flatbuffers::Offset<void> MatrixBandPartChef::value(flatbuffers::FlatBufferBuilder &fbb) const
{
diff --git a/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp b/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp
index 9dacf7bf6..290d3c2ca 100644
--- a/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp
+++ b/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp
@@ -17,7 +17,7 @@
#include "MaxPoolWithArgmax.h"
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
flatbuffers::Offset<void> MaxPoolWithArgmaxChef::value(flatbuffers::FlatBufferBuilder &fbb) const
{
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp
index aba20dcbf..7028bd9ac 100644
--- a/compiler/tflchef/core/src/ModelChef.cpp
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -582,8 +582,11 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
for (auto const &opcode : builtin_code_map)
{
tflite::OperatorCodeBuilder code_builder{*flatbuffer_builder};
- code_builder.add_builtin_code(opcode.first);
+ // TODO support for opcode.first >= 127
+ assert(opcode.first < 127);
+ code_builder.add_deprecated_builtin_code(opcode.first);
code_builder.add_version(opcode.second);
+ code_builder.add_builtin_code(opcode.first);
auto code = code_builder.Finish();
// Update OperatorCode vector
code_vec.emplace_back(code);
@@ -597,8 +600,9 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
{
auto custom_code = flatbuffer_builder->CreateString(opcode);
tflite::OperatorCodeBuilder code_builder{*flatbuffer_builder};
- code_builder.add_builtin_code(tflite::BuiltinOperator_CUSTOM);
+ code_builder.add_deprecated_builtin_code(tflite::BuiltinOperator_CUSTOM);
code_builder.add_custom_code(custom_code);
+ code_builder.add_builtin_code(tflite::BuiltinOperator_CUSTOM);
auto code = code_builder.Finish();
// Update OperatorCode vector
code_vec.emplace_back(code);
diff --git a/compiler/tflchef/requires.cmake b/compiler/tflchef/requires.cmake
index 4c02174b5..78bfa2d07 100644
--- a/compiler/tflchef/requires.cmake
+++ b/compiler/tflchef/requires.cmake
@@ -1,7 +1,7 @@
require("arser")
require("nnkit")
require("cwrap")
-require("mio-tflite")
+require("mio-tflite260")
require("safemain")
require("hermes")
require("hermes-std")
diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt
index ce8b8c463..3c4c3fff6 100644
--- a/compiler/tflchef/tflite/CMakeLists.txt
+++ b/compiler/tflchef/tflite/CMakeLists.txt
@@ -4,6 +4,6 @@ add_library(tflchef_tflite STATIC ${SOURCES})
target_include_directories(tflchef_tflite PUBLIC include)
target_include_directories(tflchef_tflite PRIVATE src)
target_link_libraries(tflchef_tflite tflchef_proto)
-target_link_libraries(tflchef_tflite mio_tflite)
+target_link_libraries(tflchef_tflite mio_tflite260)
target_link_libraries(tflchef_tflite cwrap)
target_link_libraries(tflchef_tflite souschef)
diff --git a/compiler/tflchef/tflite/src/TFliteImport.cpp b/compiler/tflchef/tflite/src/TFliteImport.cpp
index 51d9b5ffa..1462ee7f4 100644
--- a/compiler/tflchef/tflite/src/TFliteImport.cpp
+++ b/compiler/tflchef/tflite/src/TFliteImport.cpp
@@ -38,15 +38,27 @@ const char *tensor_name(const tflite::Tensor *tensor)
return kEmptyTensorName;
}
+// This will provide v3/v3a format neutral BuiltinOperator
+tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
+{
+ assert(opcode != nullptr);
+ int8_t dp_code = opcode->deprecated_builtin_code();
+ // 127 is max of int8_t which is upper bound of v3 builtin_code
+ // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127
+ if (dp_code < 127 && dp_code >= 0)
+ return tflite::BuiltinOperator(dp_code);
+ return opcode->builtin_code();
+}
+
bool is_valid(const tflite::OperatorCode *opcode)
{
- tflite::BuiltinOperator code = opcode->builtin_code();
+ tflite::BuiltinOperator code = builtin_code_neutral(opcode);
return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
}
bool is_custom(const tflite::OperatorCode *opcode)
{
- tflite::BuiltinOperator code = opcode->builtin_code();
+ tflite::BuiltinOperator code = builtin_code_neutral(opcode);
return (code == tflite::BuiltinOperator_CUSTOM);
}
@@ -92,7 +104,7 @@ tflite::BuiltinOperator TFliteImport::builtin_code(const tflite::Operator *op) c
assert(index < _op_codes.size());
const tflite::OperatorCode *opcode = _op_codes.at(index);
- return opcode->builtin_code();
+ return builtin_code_neutral(opcode);
}
std::string TFliteImport::opcode_name(const tflite::Operator *op) const
@@ -116,7 +128,7 @@ std::string TFliteImport::opcode_name(const tflite::Operator *op) const
return opcode->custom_code()->c_str();
}
- tflite::BuiltinOperator code = opcode->builtin_code();
+ tflite::BuiltinOperator code = builtin_code_neutral(opcode);
return EnumNameBuiltinOperator(code);
}
diff --git a/compiler/tflchef/tflite/src/TFliteImport.h b/compiler/tflchef/tflite/src/TFliteImport.h
index 9d0a642ab..43b5bbaff 100644
--- a/compiler/tflchef/tflite/src/TFliteImport.h
+++ b/compiler/tflchef/tflite/src/TFliteImport.h
@@ -36,6 +36,7 @@ using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operat
const char *tensor_type(const tflite::Tensor *tensor);
const char *tensor_name(const tflite::Tensor *tensor);
+tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode);
bool is_valid(const tflite::OperatorCode *opcode);
bool is_custom(const tflite::OperatorCode *opcode);
diff --git a/compiler/tfldump/CMakeLists.txt b/compiler/tfldump/CMakeLists.txt
index e6afcb6d2..83f7febad 100644
--- a/compiler/tfldump/CMakeLists.txt
+++ b/compiler/tfldump/CMakeLists.txt
@@ -1,7 +1,7 @@
-if(NOT TARGET mio_tflite)
- message(STATUS "Build tfldump: FAILED (missing mio_tflite)")
+if(NOT TARGET mio_tflite260)
+ message(STATUS "Build tfldump: FAILED (missing mio_tflite260)")
return()
-endif(NOT TARGET mio_tflite)
+endif(NOT TARGET mio_tflite260)
set(DRIVER "driver/Driver.cpp")
@@ -10,6 +10,6 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
add_executable(tfldump ${DRIVER} ${SOURCES})
target_include_directories(tfldump PRIVATE include)
target_link_libraries(tfldump arser)
-target_link_libraries(tfldump mio_tflite)
+target_link_libraries(tfldump mio_tflite260)
target_link_libraries(tfldump safemain)
-target_link_libraries(tfldump flatbuffers)
+target_link_libraries(tfldump flatbuffers-1.12)
diff --git a/compiler/tfldump/requires.cmake b/compiler/tfldump/requires.cmake
index 2cdd3a391..d0f9cccba 100644
--- a/compiler/tfldump/requires.cmake
+++ b/compiler/tfldump/requires.cmake
@@ -1,3 +1,3 @@
require("arser")
-require("mio-tflite")
+require("mio-tflite260")
require("safemain")
diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp
index 20e1343e6..7a480bc52 100644
--- a/compiler/tfldump/src/Dump.cpp
+++ b/compiler/tfldump/src/Dump.cpp
@@ -350,6 +350,7 @@ void dump_model(std::ostream &os, const tflite::Model *model)
auto opcodes = reader.opcodes();
auto buffers = reader.buffers();
auto metadata = reader.metadata();
+ auto signaturedefs = reader.signaturedefs();
// dump operator_codes
os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl;
@@ -357,11 +358,13 @@ void dump_model(std::ostream &os, const tflite::Model *model)
for (auto opcode : opcodes)
{
tflite::BuiltinOperator op_code = opcode->builtin_code();
+ tflite::BuiltinOperator dp_code = tflite::BuiltinOperator(opcode->deprecated_builtin_code());
+
auto op_name = tflread::opcode_name(opcode);
auto op_version = opcode->version();
os << "[" << opcode_index << "] " << op_name << " (code: " << op_code
- << ", version: " << op_version << ")" << std::endl;
+ << ", dep_code: " << dp_code << ", version: " << op_version << ")" << std::endl;
opcode_index++;
}
@@ -389,7 +392,38 @@ void dump_model(std::ostream &os, const tflite::Model *model)
os << "metadata : B(index) name" << std::endl;
for (uint32_t i = 0; i < metadata->Length(); ++i)
{
- os << "B(" << metadata->Get(i)->buffer() << ") " << metadata->Get(i)->name()->c_str();
+ os << "B(" << metadata->Get(i)->buffer() << ") " << metadata->Get(i)->name()->c_str()
+ << std::endl;
+ }
+ os << std::endl;
+ }
+
+ // dump signaturedef
+ if (signaturedefs != nullptr)
+ {
+ os << "SignatureDef" << std::endl;
+ for (uint32_t i = 0; i < signaturedefs->Length(); ++i)
+ {
+ auto sign_i = signaturedefs->Get(i);
+ os << "S(" << i << ") " << sign_i->method_name()->c_str() << ", key("
+ << sign_i->key()->c_str() << "), sub_graph(" << sign_i->subgraph_index() << ")"
+ << std::endl;
+
+ auto inputs_i = sign_i->inputs();
+ for (uint32_t t = 0; t < inputs_i->Length(); ++t)
+ {
+ auto inputs_i_t = inputs_i->Get(t);
+ os << " I T(" << t << ") " << inputs_i_t->name()->c_str() << ": "
+ << inputs_i_t->tensor_index() << std::endl;
+ }
+
+ auto outputs_i = sign_i->outputs();
+ for (uint32_t t = 0; t < outputs_i->Length(); ++t)
+ {
+ auto outputs_i_t = outputs_i->Get(t);
+ os << " O T(" << t << ") " << outputs_i_t->name()->c_str() << ": "
+ << outputs_i_t->tensor_index() << std::endl;
+ }
}
os << std::endl;
}
diff --git a/compiler/tfldump/src/Read.cpp b/compiler/tfldump/src/Read.cpp
index 856cc5699..8b3a96e83 100644
--- a/compiler/tfldump/src/Read.cpp
+++ b/compiler/tfldump/src/Read.cpp
@@ -22,15 +22,25 @@
namespace tflread
{
+// This will provide v3/v3a format neutral BuiltinOperator
+tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
+{
+ assert(opcode != nullptr);
+ int8_t dp_code = opcode->deprecated_builtin_code();
+ if (dp_code < 127 && dp_code >= 0)
+ return tflite::BuiltinOperator(dp_code);
+ return opcode->builtin_code();
+}
+
bool is_valid(const tflite::OperatorCode *opcode)
{
- tflite::BuiltinOperator code = opcode->builtin_code();
+ tflite::BuiltinOperator code = builtin_code_neutral(opcode);
return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
}
bool is_custom(const tflite::OperatorCode *opcode)
{
- tflite::BuiltinOperator code = opcode->builtin_code();
+ tflite::BuiltinOperator code = builtin_code_neutral(opcode);
return (code == tflite::BuiltinOperator_CUSTOM);
}
@@ -56,7 +66,7 @@ std::string opcode_name(const tflite::OperatorCode *opcode)
return custom_op;
}
- tflite::BuiltinOperator code = opcode->builtin_code();
+ tflite::BuiltinOperator code = builtin_code_neutral(opcode);
return tflite::EnumNameBuiltinOperator(code);
}
@@ -82,6 +92,7 @@ Reader::Reader(const tflite::Model *model)
_subgraphs = model->subgraphs();
_buffers = model->buffers();
_metadata = model->metadata();
+ _signaturedefs = model->signature_defs();
auto opcodes = model->operator_codes();
for (const ::tflite::OperatorCode *opcode : *opcodes)
@@ -118,7 +129,7 @@ tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const
assert(index < _op_codes.size());
const tflite::OperatorCode *opcode = _op_codes.at(index);
- return opcode->builtin_code();
+ return tflread::builtin_code_neutral(opcode);
}
std::string Reader::opcode_name(const tflite::Operator *op) const
diff --git a/compiler/tfldump/src/Read.h b/compiler/tfldump/src/Read.h
index f835be140..80f317d0b 100644
--- a/compiler/tfldump/src/Read.h
+++ b/compiler/tfldump/src/Read.h
@@ -36,6 +36,7 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
return ret;
}
+tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode);
bool is_valid(const tflite::OperatorCode *opcode);
bool is_custom(const tflite::OperatorCode *opcode);
std::string opcode_name(const tflite::OperatorCode *opcode);
@@ -53,6 +54,7 @@ private:
using TFliteTensors_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>;
using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>;
using TFliteMetadata_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Metadata>>;
+ using TFliteSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<tflite::SignatureDef>>;
public:
Reader(const tflite::Model *model);
@@ -69,6 +71,7 @@ public:
const std::vector<int32_t> &inputs() const { return _inputs; }
const std::vector<int32_t> &outputs() const { return _outputs; }
const TFliteMetadata_t *metadata() const { return _metadata; }
+ const TFliteSignatureDef_t *signaturedefs() const { return _signaturedefs; }
uint32_t num_subgraph() const { return _subgraphs->Length(); }
@@ -89,6 +92,7 @@ private:
const TFliteTensors_t *_tensors{nullptr};
const TFliteOperators_t *_operators{nullptr};
const TFliteMetadata_t *_metadata{nullptr};
+ const TFliteSignatureDef_t *_signaturedefs{nullptr};
uint32_t _subgraph_index;
std::string _subgraph_name;
diff --git a/compiler/tflite2circle/CMakeLists.txt b/compiler/tflite2circle/CMakeLists.txt
index 3e46dd803..4ea01ad31 100644
--- a/compiler/tflite2circle/CMakeLists.txt
+++ b/compiler/tflite2circle/CMakeLists.txt
@@ -1,7 +1,7 @@
nnas_include(TargetRequire)
unset(REQUIRED_TARGETS)
-list(APPEND REQUIRED_TARGETS mio_tflite)
+list(APPEND REQUIRED_TARGETS mio_tflite260)
list(APPEND REQUIRED_TARGETS mio_circle)
TargetRequire_Return(${REQUIRED_TARGETS})
@@ -11,8 +11,9 @@ add_executable(tflite2circle ${DRIVER} ${SOURCES})
target_include_directories(tflite2circle PRIVATE include)
target_include_directories(tflite2circle PRIVATE src)
target_link_libraries(tflite2circle arser)
+target_link_libraries(tflite2circle foder)
target_link_libraries(tflite2circle safemain)
-target_link_libraries(tflite2circle mio_tflite)
+target_link_libraries(tflite2circle mio_tflite260)
target_link_libraries(tflite2circle mio_circle)
target_link_libraries(tflite2circle vconone)
target_link_libraries(tflite2circle nncc_coverage)
diff --git a/compiler/tflite2circle/driver/Driver.cpp b/compiler/tflite2circle/driver/Driver.cpp
index fc7ee4042..4015631ab 100644
--- a/compiler/tflite2circle/driver/Driver.cpp
+++ b/compiler/tflite2circle/driver/Driver.cpp
@@ -70,9 +70,9 @@ int entry(int argc, char **argv)
std::string circle_path = arser.get<std::string>("circle");
// read tflite file
tflite2circle::TFLModel tfl_model(tfl_path);
- if (!tfl_model.is_valid())
+ if (not tfl_model.verify_data())
{
- std::cerr << "ERROR: Failed to load tflite '" << tfl_path << "'" << std::endl;
+ std::cerr << "ERROR: Failed to verify tflite '" << tfl_path << "'" << std::endl;
return 255;
}
@@ -80,7 +80,7 @@ int entry(int argc, char **argv)
auto flatbuffer_builder = std::make_unique<flatbuffers::FlatBufferBuilder>(1024);
// convert tflite to circle
- tflite2circle::CircleModel circle_model{flatbuffer_builder, tfl_model};
+ tflite2circle::CircleModel circle_model{flatbuffer_builder, tfl_model.get_model()};
std::ofstream outfile{circle_path, std::ios::binary};
diff --git a/compiler/tflite2circle/include/CircleModel.h b/compiler/tflite2circle/include/CircleModel.h
index e1e35d8ff..14c4f1c12 100644
--- a/compiler/tflite2circle/include/CircleModel.h
+++ b/compiler/tflite2circle/include/CircleModel.h
@@ -63,12 +63,17 @@ private:
public:
Offset(void) = delete;
- Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec);
+ Offset(FlatBufBuilder &fb) : _fb{fb} {};
+
+public:
+ // TODO use _fb
+ void build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec);
public:
CIRFlatBufVecOffset offset(void) const { return _circle_flatbuffer_vec_offset; }
private:
+ FlatBufBuilder &_fb;
CIRFlatBufVecOffset _circle_flatbuffer_vec_offset;
};
@@ -79,7 +84,7 @@ private:
public:
CircleModel(void) = delete;
- CircleModel(FlatBufBuilder &fb, TFLModel &tfl_model);
+ CircleModel(FlatBufBuilder &fb, const tflite::Model *tfl_model);
public:
void model_build(void) const;
diff --git a/compiler/tflite2circle/include/TFLModel.h b/compiler/tflite2circle/include/TFLModel.h
index e53d62749..507667bb9 100644
--- a/compiler/tflite2circle/include/TFLModel.h
+++ b/compiler/tflite2circle/include/TFLModel.h
@@ -37,15 +37,14 @@ public:
TFLModel(const std::string &path);
public:
- bool is_valid(void) { return _valid; }
+ const tflite::Model *get_model(void);
-private:
- const tflite::Model *load_model(void);
+public:
+ bool verify_data(void);
private:
std::ifstream _infile;
DataBuffer _data;
- bool _valid;
friend class CircleModel;
};
diff --git a/compiler/tflite2circle/requires.cmake b/compiler/tflite2circle/requires.cmake
index 837c287b6..e39f9eeaf 100644
--- a/compiler/tflite2circle/requires.cmake
+++ b/compiler/tflite2circle/requires.cmake
@@ -1,5 +1,6 @@
require("arser")
-require("mio-tflite")
+require("foder")
+require("mio-tflite260")
require("mio-circle")
require("safemain")
require("vconone")
diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp
index ab0b5b507..4249f1560 100644
--- a/compiler/tflite2circle/src/CircleModel.cpp
+++ b/compiler/tflite2circle/src/CircleModel.cpp
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include <cassert>
#include <iostream>
#include <memory>
@@ -24,7 +25,8 @@ namespace tflite2circle
{
template <>
-Offset<MetaDataBufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+void Offset<MetaDataBufferLink>::build(FlatBufBuilder &fb,
+ const TFLFlatBufVec *tflite_flatbuffer_vec)
{
if (tflite_flatbuffer_vec == nullptr)
return;
@@ -34,7 +36,7 @@ Offset<MetaDataBufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tfli
}
template <>
-Offset<BufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+void Offset<BufferLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
{
std::vector<flatbuffers::Offset<circle::Buffer>> buffers_vec;
@@ -55,7 +57,7 @@ Offset<BufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatb
}
template <>
-Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
{
std::vector<flatbuffers::Offset<circle::SubGraph>> subgprahs_vec;
@@ -278,8 +280,19 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
_circle_flatbuffer_vec_offset = fb->CreateVector(subgprahs_vec);
}
+tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
+{
+ assert(opcode != nullptr);
+ int8_t dp_code = opcode->deprecated_builtin_code();
+ // 127 is max of int8_t which is upper bound of v3 builtin_code
+ // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127
+ if (dp_code < 127 && dp_code >= 0)
+ return tflite::BuiltinOperator(dp_code);
+ return opcode->builtin_code();
+}
+
template <>
-Offset<OperatorCodeLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+void Offset<OperatorCodeLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
{
std::vector<flatbuffers::Offset<circle::OperatorCode>> operator_code_vec;
@@ -287,7 +300,9 @@ Offset<OperatorCodeLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
{
auto custom_code = fb->CreateString(it->custom_code());
circle::OperatorCodeBuilder operator_code_builder{*fb};
- operator_code_builder.add_builtin_code(get_circle_builtin_code(it->builtin_code()));
+ // TODO support circle deprecated_builtin_code
+ auto bt_code = builtin_code_neutral(it);
+ operator_code_builder.add_builtin_code(get_circle_builtin_code(bt_code));
operator_code_builder.add_custom_code(custom_code);
operator_code_builder.add_version(it->version());
auto code = operator_code_builder.Finish();
@@ -296,24 +311,19 @@ Offset<OperatorCodeLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
_circle_flatbuffer_vec_offset = fb->CreateVector(operator_code_vec);
}
-CircleModel::CircleModel(FlatBufBuilder &fb, TFLModel &model)
- : _version{0}, _description{fb->CreateString("nnpackage")}, _fb{fb}
+CircleModel::CircleModel(FlatBufBuilder &fb, const tflite::Model *tfl_model)
+ : _version{0}, _description{fb->CreateString("ONE-tflite2circle")}, _fb{fb}
{
- const tflite::Model *tfl_model = model.load_model();
- // verify flatbuffers
- flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model._data.data()),
- model._data.size()};
- if (!tflite::VerifyModelBuffer(verifier))
- {
- throw std::runtime_error("Failed to verify tflite");
- }
+ _operator_codes_offset = std::make_unique<Offset<OperatorCodeLink>>(fb);
+ _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb);
+ _buffers_offset = std::make_unique<Offset<BufferLink>>(fb);
+ _metadata_buffer_offset = std::make_unique<Offset<MetaDataBufferLink>>(fb);
+
+ _operator_codes_offset->build(fb, tfl_model->operator_codes());
+ _subGraphs_offset->build(fb, tfl_model->subgraphs());
+ _buffers_offset->build(fb, tfl_model->buffers());
+ _metadata_buffer_offset->build(fb, tfl_model->metadata_buffer());
- _operator_codes_offset =
- std::make_unique<Offset<OperatorCodeLink>>(fb, tfl_model->operator_codes());
- _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb, tfl_model->subgraphs());
- _buffers_offset = std::make_unique<Offset<BufferLink>>(fb, tfl_model->buffers());
- _metadata_buffer_offset =
- std::make_unique<Offset<MetaDataBufferLink>>(fb, tfl_model->metadata_buffer());
model_build();
}
diff --git a/compiler/tflite2circle/src/TFLModel.cpp b/compiler/tflite2circle/src/TFLModel.cpp
index 33f11fb83..470b1aec7 100644
--- a/compiler/tflite2circle/src/TFLModel.cpp
+++ b/compiler/tflite2circle/src/TFLModel.cpp
@@ -16,6 +16,8 @@
#include <iostream>
+#include <foder/FileLoader.h>
+
#include "TFLModel.h"
namespace tflite2circle
@@ -23,21 +25,21 @@ namespace tflite2circle
TFLModel::TFLModel(const std::string &path)
{
- _infile.open(path, std::ios::binary | std::ios::in);
- _valid = _infile.good();
+ foder::FileLoader file_loader{path};
+ _data = file_loader.load();
}
-const tflite::Model *TFLModel::load_model(void)
+bool TFLModel::verify_data(void)
{
- assert(_valid == true);
- _infile.seekg(0, std::ios::end);
- auto fileSize = _infile.tellg();
- _infile.seekg(0, std::ios::beg);
- _data.resize(fileSize);
- _infile.read(_data.data(), fileSize);
- _infile.close();
-
- return tflite::GetModel(_data.data());
+ // verify flatbuffers
+ flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(_data.data()), _data.size()};
+ if (not tflite::VerifyModelBuffer(verifier))
+ {
+ return false;
+ }
+ return true;
}
+const tflite::Model *TFLModel::get_model(void) { return tflite::GetModel(_data.data()); }
+
} // namespace tflite2circle
diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt
index 1cf7c0c45..50ee05242 100644
--- a/compiler/vconone/CMakeLists.txt
+++ b/compiler/vconone/CMakeLists.txt
@@ -1,5 +1,5 @@
if (NOT VCONONE_VERSION)
- set(VCONONE_VERSION 0x0000000100110000)
+ set(VCONONE_VERSION 0x0000000000120001)
# NOTE order is [build patch minor major]
# if VCONONE_VERSION is set with -D option, it will be cached
# you may have to remove cache file if you remove -D option
diff --git a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
index 1a180a35b..e15dc2685 100644
--- a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
+++ b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
@@ -83,10 +83,6 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map
{"topkv2_find_first_negative", "topkv2.cl"},
{"topkv2_reorder_negatives", "topkv2.cl"},
{"topkv2_store", "topkv2.cl"},
- {"radixsort_histogram", "topkv2_radixsort.cl"},
- {"radixsort_scanhistograms", "topkv2_radixsort.cl"},
- {"radixsort_pastehistograms", "topkv2_radixsort.cl"},
- {"radixsort_reorder", "topkv2_radixsort.cl"},
{"topkv2_quicksort", "topkv2_quicksort.cl"},
{"scale_factor_symm8", "scale_factor.cl"},
};
@@ -186,10 +182,6 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
#include "./cl_kernels/topkv2.clembed"
},
{
- "topkv2_radixsort.cl",
-#include "./cl_kernels/topkv2_radixsort.clembed"
- },
- {
"topkv2_quicksort.cl",
#include "./cl_kernels/topkv2_quicksort.clembed"
},
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl
deleted file mode 100644
index e9d4696b4..000000000
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-// reference:
-// https://code.google.com/archive/p/ocl-radix-sort/source/default/source
-// OpenCL kernel sources for the CLRadixSort class
-// the #include does not exist in OpenCL
-// Copyright Philippe Helluy, Université de Strasbourg, France, 2011, helluy@math.unistra.fr
-// licensed under the GNU Lesser General Public License see http://www.gnu.org/copyleft/lesser.html
-// if you find this software usefull you can cite the following work in your reports or articles:
-// Philippe HELLUY, A portable implementation of the radix sort algorithm in OpenCL, 2011.
-// http://hal.archives-ouvertes.fr/hal-00596730
-
-// Reference for floating point radix sort:
-// http://www.codercorner.com/RadixSortRevisited.htm
-
-// compute the histogram for each radix and each virtual processor for the pass
-__kernel void radixsort_histogram(__global float *in_key_buf, __global int *d_Histograms,
- const int pass, __local int *loc_histo, const int n)
-{
- int it = get_local_id(0); // i local number of the processor
- int ig = get_global_id(0); // global number = i + g I
-
- int gr = get_group_id(0); // g group number
-
- int groups = get_num_groups(0);
- int items = get_local_size(0);
-
- // set the local histograms to zero
- for (int ir = 0; ir < _RADIX; ir++)
- {
- loc_histo[ir * items + it] = 0;
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- // range of keys that are analyzed by the work item
- int size = n / groups / items; // size of the sub-list
- int start = ig * size; // beginning of the sub-list
-
- unsigned int key;
- int shortkey, k;
-
- // compute the index
- // the computation depends on the transposition
- for (int j = 0; j < size; j++)
- {
-#ifdef TRANSPOSE
- k = groups * items * j + ig;
-#else
- k = j + start;
-#endif
-
- key = *((__global unsigned int *)(in_key_buf + k));
-
- // extract the group of _BITS bits of the pass
- // the result is in the range 0.._RADIX-1
- shortkey = ((key >> (pass * _BITS)) & (_RADIX - 1));
-
- // increment the local histogram
- loc_histo[shortkey * items + it]++;
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- // copy the local histogram to the global one
- for (int ir = 0; ir < _RADIX; ir++)
- {
- d_Histograms[items * (ir * groups + gr) + it] = loc_histo[ir * items + it];
- }
-
- barrier(CLK_GLOBAL_MEM_FENCE);
-}
-
-// initial transpose of the list for improving
-// coalescent memory access
-__kernel void transpose(const __global int *invect, __global int *outvect, const int nbcol,
- const int nbrow, const __global int *inperm, __global int *outperm,
- __local int *blockmat, __local int *blockperm, const int tilesize)
-{
-
- int i0 = get_global_id(0) * tilesize; // first row index
- int j = get_global_id(1); // column index
-
- int jloc = get_local_id(1); // local column index
-
- // fill the cache
- for (int iloc = 0; iloc < tilesize; iloc++)
- {
- int k = (i0 + iloc) * nbcol + j; // position in the matrix
- blockmat[iloc * tilesize + jloc] = invect[k];
-#ifdef PERMUT
- blockperm[iloc * tilesize + jloc] = inperm[k];
-#endif
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- // first row index in the transpose
- int j0 = get_group_id(1) * tilesize;
-
- // put the cache at the good place
- for (int iloc = 0; iloc < tilesize; iloc++)
- {
- int kt = (j0 + iloc) * nbrow + i0 + jloc; // position in the transpose
- outvect[kt] = blockmat[jloc * tilesize + iloc];
-#ifdef PERMUT
- outperm[kt] = blockperm[jloc * tilesize + iloc];
-#endif
- }
-}
-
-// each virtual processor reorders its data using the scanned histogram
-__kernel void radixsort_reorder(__global float *in_key, __global float *out_key,
- __global int *d_Histograms, const int pass,
- __global int *indices_in, __global int *indices_out,
- __local int *loc_histo, const int n)
-{
-
- int it = get_local_id(0);
- int ig = get_global_id(0);
-
- int gr = get_group_id(0);
- int groups = get_num_groups(0);
- int items = get_local_size(0);
-
- int start = ig * (n / groups / items);
- int size = n / groups / items;
-
- // take the histogram in the cache
- for (int ir = 0; ir < _RADIX; ir++)
- {
- loc_histo[ir * items + it] = d_Histograms[items * (ir * groups + gr) + it];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- int newpos, shortkey, k, newpost;
- unsigned int key;
-
- for (int j = 0; j < size; j++)
- {
-#ifdef TRANSPOSE
- k = groups * items * j + ig;
-#else
- k = j + start;
-#endif
- float org_value = in_key[k];
- key = *(__global unsigned int *)(in_key + k);
- shortkey = ((key >> (pass * _BITS)) & (_RADIX - 1));
-
- newpos = loc_histo[shortkey * items + it];
-
-#ifdef TRANSPOSE
- int ignew, jnew;
- ignew = newpos / (n / groups / items);
- jnew = newpos % (n / groups / items);
- newpost = jnew * (groups * items) + ignew;
-#else
- newpost = newpos;
-#endif
-
- // d_outKeys[newpost]= key; // killing line !!!
- out_key[newpost] = org_value;
-
-#ifdef PERMUT
- indices_out[newpost] = indices_in[k];
-#endif
-
- newpos++;
- loc_histo[shortkey * items + it] = newpos;
- }
-}
-
-// perform a parallel prefix sum (a scan) on the local histograms
-// (see Blelloch 1990) each workitem worries about two memories
-// see also http://http.developer.nvidia.com/GPUGems3/gpugems3_ch39.html
-__kernel void radixsort_scanhistograms(__global int *histo, __local int *temp,
- __global int *globsum)
-{
- int it = get_local_id(0);
- int ig = get_global_id(0);
- int decale = 1;
- int n = get_local_size(0) * 2;
- int gr = get_group_id(0);
-
- // load input into local memory
- // up sweep phase
- temp[2 * it] = histo[2 * ig];
- temp[2 * it + 1] = histo[2 * ig + 1];
-
- // parallel prefix sum (algorithm of Blelloch 1990)
- for (int d = n >> 1; d > 0; d >>= 1)
- {
- barrier(CLK_LOCAL_MEM_FENCE);
- if (it < d)
- {
- int ai = decale * (2 * it + 1) - 1;
- int bi = decale * (2 * it + 2) - 1;
- temp[bi] += temp[ai];
- }
- decale *= 2;
- }
-
- // store the last element in the global sum vector
- // (maybe used in the next step for constructing the global scan)
- // clear the last element
- if (it == 0)
- {
- globsum[gr] = temp[n - 1];
- temp[n - 1] = 0;
- }
-
- // down sweep phase
- for (int d = 1; d < n; d *= 2)
- {
- decale >>= 1;
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (it < d)
- {
- int ai = decale * (2 * it + 1) - 1;
- int bi = decale * (2 * it + 2) - 1;
-
- int t = temp[ai];
- temp[ai] = temp[bi];
- temp[bi] += t;
- }
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- // write results to device memory
-
- histo[2 * ig] = temp[2 * it];
- histo[2 * ig + 1] = temp[2 * it + 1];
-
- barrier(CLK_GLOBAL_MEM_FENCE);
-}
-
-// use the global sum for updating the local histograms
-// each work item updates two values
-__kernel void radixsort_pastehistograms(__global int *histo, __global int *globsum)
-{
- int ig = get_global_id(0);
- int gr = get_group_id(0);
-
- int s;
-
- s = globsum[gr];
-
- // write results to device memory
- histo[2 * ig] += s;
- histo[2 * ig + 1] += s;
-
- barrier(CLK_GLOBAL_MEM_FENCE);
-}
diff --git a/docs/conf.py b/docs/conf.py
index ea17db054..b59cab878 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors'
author = 'Samsung Research & contributors'
# The full version, including alpha/beta/rc tags
-release = '1.17.0'
+release = '1.18.0'
# -- General configuration ---------------------------------------------------
diff --git a/docs/release/1.18/index.rst b/docs/release/1.18/index.rst
new file mode 100644
index 000000000..71c46585a
--- /dev/null
+++ b/docs/release/1.18/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+ sphinx-quickstart on Fri Oct 20 15:20:12 2021.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+1.18
+====
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ ./release-note-1.18.0.md
diff --git a/docs/release/1.18/release-note-1.18.0.md b/docs/release/1.18/release-note-1.18.0.md
new file mode 100644
index 000000000..a10f10e37
--- /dev/null
+++ b/docs/release/1.18/release-note-1.18.0.md
@@ -0,0 +1,11 @@
+# Release Note 1.18.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- More optimization pass
+ - Fold DepthwiseConv2D
+ - Substitute SplitV to Split
+ - Expand BroadCast Const
+ - Force QuantParam
diff --git a/infra/cmake/modules/ExternalSourceTools.cmake b/infra/cmake/modules/ExternalSourceTools.cmake
index 0bfbaa33b..c8ca57520 100644
--- a/infra/cmake/modules/ExternalSourceTools.cmake
+++ b/infra/cmake/modules/ExternalSourceTools.cmake
@@ -103,7 +103,13 @@ function(ExternalSource_Download PREFIX)
message(STATUS "Extract ${PREFIX}")
execute_process(COMMAND ${CMAKE_COMMAND} -E tar xfz "${DOWNLOAD_PATH}"
- WORKING_DIRECTORY "${TMP_DIR}")
+ WORKING_DIRECTORY "${TMP_DIR}"
+ ERROR_VARIABLE EXTRACTION_ERROR)
+
+ if(EXTRACTION_ERROR)
+ message(FATAL_ERROR "Extract ${PREFIX} - failed")
+ endif(EXTRACTION_ERROR)
+
file(REMOVE "${DOWNLOAD_PATH}")
message(STATUS "Extract ${PREFIX} - done")
diff --git a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
new file mode 100644
index 000000000..b48239f2a
--- /dev/null
+++ b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
@@ -0,0 +1,13 @@
+function(_CMSISSource_import)
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(CMSIS_5_8_0_URL https://github.com/ARM-software/CMSIS_5/archive/refs/tags/5.8.0.tar.gz)
+
+ ExternalSource_Download(CMSIS DIRNAME CMSIS-5.8.0 ${CMSIS_5_8_0_URL})
+
+ set(CMSISSource_DIR ${CMSIS_SOURCE_DIR} PARENT_SCOPE)
+ set(CMSISSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_CMSISSource_import)
+
+_CMSISSource_import()
diff --git a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfigVersion.cmake b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfigVersion.cmake
new file mode 100644
index 000000000..ca6f7826d
--- /dev/null
+++ b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "5.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake
new file mode 100644
index 000000000..0eb8eb91c
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake
@@ -0,0 +1,118 @@
+function(_FlatBuffers_import)
+ find_package(Flatbuffers QUIET)
+ set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE)
+endfunction(_FlatBuffers_import)
+
+function(_FlatBuffers_build)
+ if(NOT BUILD_FLATBUFFERS)
+ message(STATUS "FlatBuffersConfig skip: BUILD_FLATBUFFERS OFF")
+ return()
+ endif(NOT BUILD_FLATBUFFERS)
+
+ nnas_find_package(FlatBuffersSource EXACT 1.10 QUIET)
+
+ if(NOT FlatBuffersSource_FOUND)
+ # Source is not available
+ message(STATUS "FlatBuffersConfig skip: FlatBuffersSource not found")
+ return()
+ endif(NOT FlatBuffersSource_FOUND)
+
+ set(ADDITIONAL_CXX_FLAGS "")
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
+ set(ADDITIONAL_CXX_FLAGS "-Wno-error=class-memaccess")
+ endif()
+
+ nnas_include(ExternalBuildTools)
+ ExternalBuild_CMake(CMAKE_DIR ${FlatBuffersSource_DIR}
+ BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build
+ INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10
+ BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
+ IDENTIFIER "1.10-fix4"
+ EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON"
+ PKG_NAME "FLATBUFFERS-1.10")
+
+endfunction(_FlatBuffers_build)
+
+_FlatBuffers_build()
+_FlatBuffers_import()
+
+if(FlatBuffers_FOUND)
+ if(NOT TARGET flatbuffers-1.10)
+ add_library(flatbuffers-1.10 INTERFACE)
+ target_link_libraries(flatbuffers-1.10 INTERFACE flatbuffers::flatbuffers)
+ message(STATUS "Found FlatBuffers-1.10: TRUE")
+ endif(NOT TARGET flatbuffers-1.10)
+
+ function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
+ get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
+ get_filename_component(abs_schema_dir ${SCHEMA_DIR} ABSOLUTE)
+
+ foreach(schema ${ARGN})
+ get_filename_component(schema_fn "${schema}" NAME)
+ get_filename_component(dir "${schema}" DIRECTORY)
+
+ get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
+
+ list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
+ list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
+ endforeach()
+
+ add_custom_command(OUTPUT ${OUTPUT_FILES}
+ COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
+ COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
+ --no-union-value-namespacing
+ --gen-object-api -o "${abs_output_dir}"
+ ${SCHEMA_FILES}
+ DEPENDS flatbuffers::flatc)
+
+ set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
+ set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE)
+ endfunction(FlatBuffers_Generate)
+
+ function(FlatBuffers_Target TGT)
+ set(oneValueArgs OUTPUT_DIR SCHEMA_DIR INCLUDE_DIR)
+ set(multiValueArgs SCHEMA_FILES)
+ cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ # Use OUTPUT_DIR as INCLUDE_DIR if INCLUDE_DIR is not specified
+ if(NOT ARG_INCLUDE_DIR)
+ set(ARG_INCLUDE_DIR ${ARG_OUTPUT_DIR})
+ endif(NOT ARG_INCLUDE_DIR)
+
+ get_filename_component(abs_output_dir ${ARG_OUTPUT_DIR} ABSOLUTE)
+ get_filename_component(abs_include_dir ${ARG_INCLUDE_DIR} ABSOLUTE)
+ get_filename_component(abs_schema_dir ${ARG_SCHEMA_DIR} ABSOLUTE)
+
+ # Let's reset list variables before using them
+ # NOTE THIS DOES NOT AFFECT parent scope
+ unset(SCHEMA_FILES)
+ unset(OUTPUT_FILES)
+
+ foreach(schema ${ARG_SCHEMA_FILES})
+ get_filename_component(schema_fn "${schema}" NAME)
+ get_filename_component(dir "${schema}" DIRECTORY)
+
+ get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
+
+ list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
+ list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
+ endforeach()
+
+ # Generate headers
+ add_custom_command(OUTPUT ${OUTPUT_FILES}
+ COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
+ COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
+ --no-union-value-namespacing
+ --gen-object-api -o "${abs_output_dir}"
+ ${SCHEMA_FILES}
+ DEPENDS ${SCHEMA_FILES}
+ COMMENT "Generate '${TGT}' headers")
+
+ # NOTE This header-only library is deliberately declared as STATIC library
+ # to avoid possible scope issues related with generated files
+ add_library(${TGT} STATIC ${OUTPUT_FILES})
+ set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
+ target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
+ target_link_libraries(${TGT} PUBLIC flatbuffers-1.10)
+ endfunction(FlatBuffers_Target)
+endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake
new file mode 100644
index 000000000..6585f21d5
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "1.10")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake
new file mode 100644
index 000000000..daa749c58
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake
@@ -0,0 +1,118 @@
+function(_FlatBuffers_import)
+ find_package(Flatbuffers QUIET)
+ set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE)
+endfunction(_FlatBuffers_import)
+
+function(_FlatBuffers_build)
+ if(NOT BUILD_FLATBUFFERS)
+ message(STATUS "FlatBuffersConfig !BUILD_FLATBUFFERS")
+ return()
+ endif(NOT BUILD_FLATBUFFERS)
+
+ nnas_find_package(FlatBuffersSource EXACT 1.12 QUIET)
+
+ if(NOT FlatBuffersSource_FOUND)
+ # Source is not available
+ message(STATUS "FlatBuffersConfig !FlatBuffersSource_FOUND")
+ return()
+ endif(NOT FlatBuffersSource_FOUND)
+
+ set(ADDITIONAL_CXX_FLAGS "")
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
+ set(ADDITIONAL_CXX_FLAGS "-Wno-error=class-memaccess")
+ endif()
+
+ nnas_include(ExternalBuildTools)
+ ExternalBuild_CMake(CMAKE_DIR ${FlatBuffersSource_DIR}
+ BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.12/build
+ INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.12
+ BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
+ IDENTIFIER "1.12-fix1"
+ EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON"
+ PKG_NAME "FLATBUFFERS-1.12")
+
+endfunction(_FlatBuffers_build)
+
+_FlatBuffers_build()
+_FlatBuffers_import()
+
+if(FlatBuffers_FOUND)
+ if(NOT TARGET flatbuffers-1.12)
+ add_library(flatbuffers-1.12 INTERFACE)
+ target_link_libraries(flatbuffers-1.12 INTERFACE flatbuffers::flatbuffers)
+ message(STATUS "Found FlatBuffers-1.12: TRUE")
+ endif(NOT TARGET flatbuffers-1.12)
+
+ function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
+ get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
+ get_filename_component(abs_schema_dir ${SCHEMA_DIR} ABSOLUTE)
+
+ foreach(schema ${ARGN})
+ get_filename_component(schema_fn "${schema}" NAME)
+ get_filename_component(dir "${schema}" DIRECTORY)
+
+ get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
+
+ list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
+ list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
+ endforeach()
+
+ add_custom_command(OUTPUT ${OUTPUT_FILES}
+ COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
+ COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
+ --no-union-value-namespacing
+ --gen-object-api -o "${abs_output_dir}"
+ ${SCHEMA_FILES}
+ DEPENDS flatbuffers::flatc)
+
+ set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
+ set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE)
+ endfunction(FlatBuffers_Generate)
+
+ function(FlatBuffers_Target TGT)
+ set(oneValueArgs OUTPUT_DIR SCHEMA_DIR INCLUDE_DIR)
+ set(multiValueArgs SCHEMA_FILES)
+ cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ # Use OUTPUT_DIR as INCLUDE_DIR if INCLUDE_DIR is not specified
+ if(NOT ARG_INCLUDE_DIR)
+ set(ARG_INCLUDE_DIR ${ARG_OUTPUT_DIR})
+ endif(NOT ARG_INCLUDE_DIR)
+
+ get_filename_component(abs_output_dir ${ARG_OUTPUT_DIR} ABSOLUTE)
+ get_filename_component(abs_include_dir ${ARG_INCLUDE_DIR} ABSOLUTE)
+ get_filename_component(abs_schema_dir ${ARG_SCHEMA_DIR} ABSOLUTE)
+
+ # Let's reset list variables before using them
+ # NOTE THIS DOES NOT AFFECT parent scope
+ unset(SCHEMA_FILES)
+ unset(OUTPUT_FILES)
+
+ foreach(schema ${ARG_SCHEMA_FILES})
+ get_filename_component(schema_fn "${schema}" NAME)
+ get_filename_component(dir "${schema}" DIRECTORY)
+
+ get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
+
+ list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
+ list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
+ endforeach()
+
+ # Generate headers
+ add_custom_command(OUTPUT ${OUTPUT_FILES}
+ COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
+ COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
+ --no-union-value-namespacing
+ --gen-object-api -o "${abs_output_dir}"
+ ${SCHEMA_FILES}
+ DEPENDS ${SCHEMA_FILES}
+ COMMENT "Generate '${TGT}' headers")
+
+ # NOTE This header-only library is deliberately declared as STATIC library
+ # to avoid possible scope issues related with generated files
+ add_library(${TGT} STATIC ${OUTPUT_FILES})
+ set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
+ target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
+ target_link_libraries(${TGT} PUBLIC flatbuffers-1.12)
+ endfunction(FlatBuffers_Target)
+endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake
new file mode 100644
index 000000000..8cfdbf8e5
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "1.12")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffersConfig.cmake
index da084e7d3..e551e29c8 100644
--- a/infra/cmake/packages/FlatBuffersConfig.cmake
+++ b/infra/cmake/packages/FlatBuffersConfig.cmake
@@ -5,6 +5,7 @@ endfunction(_FlatBuffers_import)
function(_FlatBuffers_build)
if(NOT BUILD_FLATBUFFERS)
+ message(STATUS "FlatBuffersConfig skip: BUILD_FLATBUFFERS OFF")
return()
endif(NOT BUILD_FLATBUFFERS)
@@ -12,6 +13,7 @@ function(_FlatBuffers_build)
if(NOT FlatBuffersSource_FOUND)
# Source is not available
+ message(STATUS "FlatBuffersConfig skip: FlatBuffersSource not found")
return()
endif(NOT FlatBuffersSource_FOUND)
@@ -22,12 +24,12 @@ function(_FlatBuffers_build)
nnas_include(ExternalBuildTools)
ExternalBuild_CMake(CMAKE_DIR ${FlatBuffersSource_DIR}
- BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS/build
- INSTALL_DIR ${EXT_OVERLAY_DIR}
+ BUILD_DIR ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build
+ INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10
BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
- IDENTIFIER "1.10-fix2"
- EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
- PKG_NAME "FLATBUFFERS")
+ IDENTIFIER "1.10-fix4"
+ EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON"
+ PKG_NAME "FLATBUFFERS-1.10")
endfunction(_FlatBuffers_build)
@@ -35,11 +37,11 @@ _FlatBuffers_build()
_FlatBuffers_import()
if(FlatBuffers_FOUND)
- if(NOT TARGET flatbuffers)
- add_library(flatbuffers INTERFACE)
- target_link_libraries(flatbuffers INTERFACE flatbuffers::flatbuffers)
- message(STATUS "Found FlatBuffers: TRUE")
- endif(NOT TARGET flatbuffers)
+ if(NOT TARGET flatbuffers-1.10)
+ add_library(flatbuffers-1.10 INTERFACE)
+ target_link_libraries(flatbuffers-1.10 INTERFACE flatbuffers::flatbuffers)
+ message(STATUS "Found FlatBuffers-1.10: TRUE")
+ endif(NOT TARGET flatbuffers-1.10)
function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
@@ -111,6 +113,6 @@ if(FlatBuffers_FOUND)
add_library(${TGT} STATIC ${OUTPUT_FILES})
set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
- target_link_libraries(${TGT} PUBLIC flatbuffers)
+ target_link_libraries(${TGT} PUBLIC flatbuffers-1.10)
endfunction(FlatBuffers_Target)
endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
new file mode 100644
index 000000000..8b1743066
--- /dev/null
+++ b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
@@ -0,0 +1,13 @@
+function(_MbedOSSource_import)
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(MBEDOS_6_15_URL https://github.com/ARMmbed/mbed-os/archive/refs/tags/mbed-os-6.15.0.tar.gz)
+
+ ExternalSource_Download(MBEDOS DIRNAME MBEDOS-6.15 ${MBEDOS_6_15_URL})
+
+ set(MbedOSSource_DIR ${MBEDOS_SOURCE_DIR} PARENT_SCOPE)
+ set(MbedOSSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_MbedOSSource_import)
+
+_MbedOSSource_import()
diff --git a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfigVersion.cmake b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfigVersion.cmake
new file mode 100644
index 000000000..acdd54ad6
--- /dev/null
+++ b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "6.15")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfig.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfig.cmake
new file mode 100644
index 000000000..a9ec75d34
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_TensorFlowEigenSource_import)
+ if(NOT DOWNLOAD_EIGEN)
+ set(TensorFlowEigenSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_EIGEN)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ # Exact version used by TensorFlow v2.6.0.
+ # See tensorflow/third_party/eigen3/workspace.bzl.
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com")
+ envoption(TENSORFLOW_2_6_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/12e8d57108c50d8a63605c6eb0144c838c128337/eigen-12e8d57108c50d8a63605c6eb0144c838c128337.tar.gz)
+
+ ExternalSource_Download(EIGEN DIRNAME TENSORFLOW-2.6.0-EIGEN ${TENSORFLOW_2_6_0_EIGEN_URL})
+
+ set(TensorFlowEigenSource_DIR ${EIGEN_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowEigenSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowEigenSource_import)
+
+_TensorFlowEigenSource_import()
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfigVersion.cmake
new file mode 100644
index 000000000..38ad0aa31
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.6.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake
new file mode 100644
index 000000000..b7f3148e8
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -0,0 +1,20 @@
+function(_TensorFlowGEMMLowpSource_import)
+ if(NOT DOWNLOAD_GEMMLOWP)
+ set(TensorFlowGEMMLowpSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_GEMMLOWP)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ # Exact version used by TensorFlow v2.6.0.
+ # See tensorflow/third_party/gemmlowp/workspace.bzl.
+ envoption(TENSORFLOW_2_6_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+
+ ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.6.0-GEMMLOWP ${TENSORFLOW_2_6_0_GEMMLOWP_URL})
+
+ set(TensorFlowGEMMLowpSource_DIR ${GEMMLOWP_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowGEMMLowpSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowGEMMLowpSource_import)
+
+_TensorFlowGEMMLowpSource_import()
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfigVersion.cmake
new file mode 100644
index 000000000..38ad0aa31
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.6.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake
new file mode 100644
index 000000000..b4dee914f
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake
@@ -0,0 +1,20 @@
+function(_TensorFlowRuySource_import)
+ if(NOT DOWNLOAD_RUY)
+ set(TensorFlowRuySource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_RUY)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ # Exact version used by TensorFlow v2.6.0.
+ # See tensorflow/third_party/ruy/workspace.bzl
+ envoption(TENSORFLOW_2_6_0_RUY_URL https://github.com/google/ruy/archive/e6c1b8dc8a8b00ee74e7268aac8b18d7260ab1ce.zip)
+
+ ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.6.0-RUY ${TENSORFLOW_2_6_0_RUY_URL})
+
+ set(TensorFlowRuySource_DIR ${RUY_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowRuySource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowRuySource_import)
+
+_TensorFlowRuySource_import()
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfigVersion.cmake
new file mode 100644
index 000000000..38ad0aa31
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.6.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake
new file mode 100644
index 000000000..611c7c805
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake
@@ -0,0 +1,18 @@
+function(_TensorFlowSource_import)
+ if(NOT DOWNLOAD_TENSORFLOW)
+ set(TensorFlowSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_TENSORFLOW)
+
+ nnas_include(ExternalSourceTools)
+ nnas_include(OptionTools)
+
+ envoption(TENSORFLOW_2_6_0_URL https://github.com/tensorflow/tensorflow/archive/v2.6.0.tar.gz)
+
+ ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.6.0 ${TENSORFLOW_2_6_0_URL})
+
+ set(TensorFlowSource_DIR ${TENSORFLOW_SOURCE_DIR} PARENT_SCOPE)
+ set(TensorFlowSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowSource_import)
+
+_TensorFlowSource_import()
diff --git a/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfigVersion.cmake
new file mode 100644
index 000000000..38ad0aa31
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.6.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+ set(PACKAGE_VERSION_EXACT TRUE)
+ set(PACKAGE_VERSION_COMPATIBLE TRUE)
+ set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/debian/compiler/changelog b/infra/debian/compiler/changelog
index 6859255ff..12af5f928 100644
--- a/infra/debian/compiler/changelog
+++ b/infra/debian/compiler/changelog
@@ -1,3 +1,9 @@
+one (1.18.0) bionic; urgency=medium
+
+ * More optimization pass
+
+ -- seongwoo <mhs4670go@naver.com> Fri, 15 Oct 2021 15:23:20 +0900
+
one (1.17.0) bionic; urgency=medium
* More optimization pass
diff --git a/infra/debian/compiler/one-compiler.install b/infra/debian/compiler/one-compiler.install
index ba628545b..cbca47802 100644
--- a/infra/debian/compiler/one-compiler.install
+++ b/infra/debian/compiler/one-compiler.install
@@ -3,7 +3,6 @@
usr/bin/circle2circle usr/share/one/bin/
usr/bin/circle_partitioner usr/share/one/bin/
usr/bin/circle-quantizer usr/share/one/bin/
-usr/bin/conv_mixin_1.8.0.patch usr/share/one/bin/
usr/bin/generate_bcq_metadata.py usr/share/one/bin/
usr/bin/generate_bcq_output_arrays.py usr/share/one/bin/
usr/bin/model2nnpkg.sh usr/share/one/bin/
diff --git a/infra/debian/compiler/one-compiler.links b/infra/debian/compiler/one-compiler.links
index 8b6e542c1..9e464352a 100644
--- a/infra/debian/compiler/one-compiler.links
+++ b/infra/debian/compiler/one-compiler.links
@@ -13,4 +13,5 @@ usr/share/one/lib/libluci_log.so usr/lib/libluci_log.so
usr/share/one/lib/libluci_partition.so usr/lib/libluci_partition.so
usr/share/one/lib/libluci_pass.so usr/lib/libluci_pass.so
usr/share/one/lib/libluci_profile.so usr/lib/libluci_profile.so
+usr/share/one/lib/libluci_plan.so usr/lib/libluci_plan.so
usr/share/one/lib/libluci_service.so usr/lib/libluci_service.so
diff --git a/infra/debian/compiler/rules b/infra/debian/compiler/rules
index 21b956b2f..e42faae09 100755
--- a/infra/debian/compiler/rules
+++ b/infra/debian/compiler/rules
@@ -1,7 +1,7 @@
#!/usr/bin/make -f
export DH_VERBOSE = 1
export NNAS_BUILD_PREFIX = build
-export PRESET = 20210706
+export PRESET = 20210910
export _DESTDIR = debian/tmp/usr
%:
diff --git a/infra/debian/runtime/changelog b/infra/debian/runtime/changelog
index 4a41d959c..ee0d3e6ee 100644
--- a/infra/debian/runtime/changelog
+++ b/infra/debian/runtime/changelog
@@ -1,3 +1,9 @@
+one (1.18.0) bionic; urgency=low
+
+ * Synch up version with ONE Compiler
+
+ -- Chunseok Lee <chunseok.lee@samsung.com> Fri, 15 Oct 2021 15:23:00 +0900
+
one (1.17.0) bionic; urgency=low
* New gpu_gl backend supports the following operations : Add, Convolution, Depthwise Convolution, Pooling, Reshape, Relu, Softmax
diff --git a/infra/nncc/CMakeLists.txt b/infra/nncc/CMakeLists.txt
index eb279902e..bde684938 100644
--- a/infra/nncc/CMakeLists.txt
+++ b/infra/nncc/CMakeLists.txt
@@ -130,6 +130,11 @@ option(ENABLE_STRICT_BUILD "Treat warning as error" OFF)
# Check our ProtobufConfig.cmake for its usage.
option(USE_PROTOBUF_LEGACY_IMPORT "Use legacy MODULE mode import rather than CONFIG mode" OFF)
+# This option might be turned ON for MCU builds of luci related components.
+# It specify which library type to use for build:
+# if set ON - luci libraries are static, otherwise - shared.
+option(STATIC_LUCI "Build luci as a static libraries" OFF)
+
###
### Target
###
diff --git a/infra/nncc/command/utcount b/infra/nncc/command/utcount
index 64aaace9b..65aea8bae 100644
--- a/infra/nncc/command/utcount
+++ b/infra/nncc/command/utcount
@@ -14,7 +14,7 @@ oops pepper-assert \
hermes hermes-std \
loco locop locomotiv logo-core logo \
foder souschef arser vconone crew \
-safemain mio-circle mio-tflite \
+safemain mio-circle mio-tflite mio-tflite260 \
tflite2circle \
luci \
luci-interpreter \
diff --git a/infra/packaging/build b/infra/packaging/build
index 8d3230010..53d63713b 100644
--- a/infra/packaging/build
+++ b/infra/packaging/build
@@ -8,7 +8,7 @@ if [[ -z "${NNAS_PROJECT_PATH}" ]]; then
fi
# The default preset
-PRESET="20210706"
+PRESET="20210910"
EXTRA_OPTIONS=()
while [ "$#" -ne 0 ]; do
diff --git a/infra/packaging/preset/20210910 b/infra/packaging/preset/20210910
new file mode 100644
index 000000000..d00b1ccad
--- /dev/null
+++ b/infra/packaging/preset/20210910
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+# new official preset will be added when new programs are ready
+
+PRESET="20210910"
+
+function preset_configure()
+{
+ REQUIRED_UNITS=()
+ # Common Libraries
+ REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+ REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+ REQUIRED_UNITS+=("souschef")
+ REQUIRED_UNITS+=("safemain")
+ REQUIRED_UNITS+=("arser")
+ REQUIRED_UNITS+=("vconone")
+ # Hermes Logging Framework
+ REQUIRED_UNITS+=("hermes" "hermes-std")
+ # loco IR and related utilities
+ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+ # Flatbuffer I/O
+ REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-circle")
+ # Circle compiler library (.circle -> .circle)
+ REQUIRED_UNITS+=("luci")
+ # Tools
+ REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+ REQUIRED_UNITS+=("circle-tensordump" "circledump")
+ REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+ REQUIRED_UNITS+=("luci-eval-driver")
+ REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+ REQUIRED_UNITS+=("circle-partitioner")
+ REQUIRED_UNITS+=("one-cmds")
+ REQUIRED_UNITS+=("bcq-tools")
+
+ NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+ # TODO Use "nncc configure" and "nncc build"
+ cmake \
+ -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+ -DCMAKE_BUILD_TYPE=release \
+ -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+ -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+ ${EXTRA_OPTIONS[@]} \
+ "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+ install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+ "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+ # Install tf2nnpkg
+ install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
diff --git a/infra/packaging/preset/20210910_windows b/infra/packaging/preset/20210910_windows
new file mode 100644
index 000000000..642bdbd76
--- /dev/null
+++ b/infra/packaging/preset/20210910_windows
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+function preset_configure()
+{
+ REQUIRED_UNITS=()
+ # Common Libraries
+ REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+ REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+ REQUIRED_UNITS+=("souschef")
+ REQUIRED_UNITS+=("safemain")
+ REQUIRED_UNITS+=("arser")
+ REQUIRED_UNITS+=("vconone")
+ # Hermes Logging Framework
+ REQUIRED_UNITS+=("hermes" "hermes-std")
+ # loco IR and related utilities
+ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+ # Flatbuffer I/O
+ REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-circle")
+ # Circle compiler library (.circle -> .circle)
+ REQUIRED_UNITS+=("luci")
+ # Tools
+ REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+ REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+ REQUIRED_UNITS+=("luci-eval-driver")
+ REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+ REQUIRED_UNITS+=("circle-partitioner")
+ REQUIRED_UNITS+=("one-cmds")
+ REQUIRED_UNITS+=("bcq-tools")
+
+ NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+ # TODO Use "nncc configure" and "nncc build"
+ cmake \
+ -G "MSYS Makefiles" \
+ -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+ -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+ -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+ -DENABLE_TEST=OFF \
+ -DDOWNLOAD_GTEST=OFF \
+ -DBUILD_GTEST=OFF \
+ -DCMAKE_C_COMPILER=gcc \
+ -DCMAKE_CXX_COMPILER=g++ \
+ -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+ -DCMAKE_BUILD_TYPE=release \
+ -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+ -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+ ${EXTRA_OPTIONS[@]} \
+ "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+ # Install libraries to bin/ for Windows release
+ mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+ rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+ install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+ "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+ # Install tf2nnpkg
+ install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.20210910" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+ # Though you have to install tensorflow to run 'tf2tfliteV2',
+ # tensorflow can't be installed in mingw. First, You can install tensorflow
+ # from Window native CMD(run as administrator) with python virtual environment.
+ # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
diff --git a/infra/packaging/res/tf2nnpkg.20210910 b/infra/packaging/res/tf2nnpkg.20210910
new file mode 100644
index 000000000..0d44818a1
--- /dev/null
+++ b/infra/packaging/res/tf2nnpkg.20210910
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+ if [ "$#" -le 0 ]; then
+ return 1
+ fi
+ command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+ echo "Convert TensorFlow model to nnpackage."
+ echo "Usage: tf2nnpkg"
+ echo " --info <path/to/info>"
+ echo " --graphdef <path/to/pb>"
+ echo " -o <path/to/nnpkg/directory>"
+ echo " --v2 (optional) Use TF 2.x interface"
+ exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+ CUR="$1"
+
+ case $CUR in
+ '--help')
+ usage
+ ;;
+ '--info')
+ export INFO_FILE="$2"
+ shift 2
+ ;;
+ '--graphdef')
+ export GRAPHDEF_FILE="$2"
+ shift 2
+ ;;
+ '-o')
+ export OUTPUT_DIR="$2"
+ shift 2
+ ;;
+ '--v2')
+ TF_INTERFACE="--v2"
+ shift
+ ;;
+ *)
+ echo "${CUR}"
+ shift
+ ;;
+ esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+ echo "pb is not found. Please check --graphdef is correct."
+ exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+ echo "info is not found. Please check --info is correct."
+ exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+ echo "output directory is not specifed. Please check -o is correct.."
+ exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+ source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+ source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+ONE_IMPORT_BCQ_SCRIPT="${ROOT}/bin/one-import-bcq ${TF_INTERFACE} "
+ONE_IMPORT_BCQ_SCRIPT+="-i ${GRAPHDEF_FILE} "
+ONE_IMPORT_BCQ_SCRIPT+="-o ${TMPDIR}/${MODEL_NAME}.tmp.circle "
+ONE_IMPORT_BCQ_SCRIPT+="-I ${INPUT} "
+ONE_IMPORT_BCQ_SCRIPT+="-O ${OUTPUT} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+ ONE_IMPORT_BCQ_SCRIPT+="-s ${INPUT_SHAPES} "
+fi
+
+${ONE_IMPORT_BCQ_SCRIPT}
+
+# optimize
+"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/scripts/compiler_modules.sh b/infra/scripts/compiler_modules.sh
index a63140eaf..e520dd381 100644
--- a/infra/scripts/compiler_modules.sh
+++ b/infra/scripts/compiler_modules.sh
@@ -8,7 +8,7 @@ DEBUG_BUILD_ITEMS+=";oops;pepper-assert;pepper-csv2vec"
DEBUG_BUILD_ITEMS+=";hermes;hermes-std"
DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo"
DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone"
-DEBUG_BUILD_ITEMS+=";safemain;mio-circle;mio-tflite"
+DEBUG_BUILD_ITEMS+=";safemain;mio-circle;mio-tflite;mio-tflite260"
DEBUG_BUILD_ITEMS+=";tflite2circle"
DEBUG_BUILD_ITEMS+=";luci"
DEBUG_BUILD_ITEMS+=";luci-interpreter"
diff --git a/infra/scripts/docker_collect_nnpkg_resources.sh b/infra/scripts/docker_collect_nnpkg_resources.sh
index 65963f4b8..475da6d06 100755
--- a/infra/scripts/docker_collect_nnpkg_resources.sh
+++ b/infra/scripts/docker_collect_nnpkg_resources.sh
@@ -71,7 +71,7 @@ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
# Circle compiler library (.circle -> .circle)
REQUIRED_UNITS+=("luci")
# Flatbuffer I/O
-REQUIRED_UNITS+=("mio-tflite" "mio-circle")
+REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-circle")
# Tools
REQUIRED_UNITS+=("tflite2circle" "circle2circle" "luci-interpreter")
REQUIRED_UNITS+=("souschef" "tflchef" "circlechef" "circle-verify")
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec
index 0d170e7ed..4133d7a06 100644
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -1,9 +1,9 @@
Name: nnfw
Summary: nnfw
-Version: 1.17.0
+Version: 1.18.0
Release: 1
Group: Development
-License: Apache-2.0 and MIT and BSD-2-Clause
+License: Apache-2.0 and MIT and BSD-2-Clause and MPL-2.0
Source0: %{name}-%{version}.tar.gz
Source1: %{name}.manifest
diff --git a/res/TensorFlowLiteRecipes/PadV2_001/test.recipe b/res/TensorFlowLiteRecipes/PadV2_001/test.recipe
new file mode 100644
index 000000000..0eafec931
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/PadV2_001/test.recipe
@@ -0,0 +1,68 @@
+operand {
+ name: "ifm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "relu"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+ name: "padding"
+ type: INT32
+ shape { dim: 4 dim: 2 }
+ filler {
+ tag: "explicit"
+ arg: "0" arg: "0"
+ arg: "1" arg: "1"
+ arg: "1" arg: "1"
+ arg: "0" arg: "0"
+ }
+}
+operand {
+ name: "constant_values"
+ type: FLOAT32
+ shape { dim: 1 }
+ filler {
+ tag: "explicit"
+ arg: "-100.00"
+ }
+}
+operand {
+ name: "padv2"
+ type: FLOAT32
+ shape { dim: 1 dim: 5 dim: 5 dim: 2 }
+}
+operand {
+ name: "ofm"
+ type: FLOAT32
+ shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+ type: "ReLU"
+ input: "ifm"
+ output: "relu"
+}
+operation {
+ type: "PadV2"
+ input: "relu"
+ input: "padding"
+ input: "constant_values"
+ output: "padv2"
+}
+operation {
+ type: "MaxPool2D"
+ maxpool2d_options {
+ padding: VALID
+ stride_w: 1
+ stride_h: 1
+ filter_height: 3
+ filter_width: 3
+ }
+ input: "padv2"
+ output: "ofm"
+}
+
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/PadV2_001/test.rule b/res/TensorFlowLiteRecipes/PadV2_001/test.rule
new file mode 100644
index 000000000..29b080b1e
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/PadV2_001/test.rule
@@ -0,0 +1,8 @@
+# To check if PadV2 is converted to Pad
+
+RULE "VERIFY_FILE_FORMAT" $(verify_file_format) '=' 1
+
+RULE "PAD_EXIST" $(op_count PAD) '=' 1
+RULE "MAXPOOL2D_EXIST" $(op_count MAX_POOL_2D) '=' 1
+RULE "RELU_EXIST" $(op_count RELU) '=' 1
+RULE "NO_PADV2" $(op_count PADV2) '=' 0
diff --git a/res/TensorFlowLiteSchema/2.6.0/schema.fbs b/res/TensorFlowLiteSchema/2.6.0/schema.fbs
new file mode 100644
index 000000000..6fc51f838
--- /dev/null
+++ b/res/TensorFlowLiteSchema/2.6.0/schema.fbs
@@ -0,0 +1,1240 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+// Version 3a: Add new builtin op code field. Has backward compatibility with
+// version 3.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+ FLOAT32 = 0,
+ FLOAT16 = 1,
+ INT32 = 2,
+ UINT8 = 3,
+ INT64 = 4,
+ STRING = 5,
+ BOOL = 6,
+ INT16 = 7,
+ COMPLEX64 = 8,
+ INT8 = 9,
+ FLOAT64 = 10,
+ COMPLEX128 = 11,
+ UINT64 = 12,
+ // Experimental: Resource and variant types are experimental, that are subject
+ // to change. Do not implement custom kernels using resource & variant types
+ // now.
+ RESOURCE = 13,
+ VARIANT = 14,
+ UINT32 = 15,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+ custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+ CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+ // These four parameters are the asymmetric linear quantization parameters.
+ // Given a quantized value q, the corresponding float value f should be:
+ // f = scale * (q - zero_point)
+ // For other quantization types, the QuantizationDetails below is used.
+ min:[float]; // For importing back into tensorflow.
+ max:[float]; // For importing back into tensorflow.
+ scale:[float]; // For dequantizing the tensor's values.
+ zero_point:[long];
+
+ // If this is not none, the other quantization parameters (i.e. min, max,
+ // scale, zero_point fields above) are ignored and the value of the
+ // QuantizationDetails union should be used.
+ details:QuantizationDetails;
+
+ // Specifies the dimension of the Tensor's shape that the scales and
+ // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+ // with quantization params:
+ // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+ // will be quantized across the second dimension of t.
+ // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+ // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+ // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+ quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+// 1. In what order to traverse these dimensions. For example, to store a 2-D
+// matrix in row major order, the traversal order would be (d0, d1),
+// whereas to store it in column major order, the traversal order would be
+// (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+// could be (d0, d1, d2, d3).
+// 2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+// tensor dimension in (d0, ..., dn-1).
+// 3. In the traversal order defined above, the format (dense vs. sparse) and
+// index metadata for each dimension. For a dense dimension, this is just
+// the size of that dimension. For a sparse dimension, it's the same as
+// the compressed index defined in the Compressed Sparse Row (CSR) format.
+// (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+// 1. DENSE: each coordinate in this dimension is stored implicitly.
+// 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+// compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+ DENSE = 0,
+ SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+ values:[int];
+}
+
+table Uint16Vector {
+ values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+ values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+ Int32Vector,
+ Uint16Vector,
+ Uint8Vector
+}
+
+table DimensionMetadata {
+ // Whether a dimension is dense or sparse.
+ format:DimensionType;
+ // Index metadata used for a dimension.
+ // - If format is DimensionType.DENSE then we use the dense_size field to
+ // store the size of that dimension. Each index in that dimension is
+ // stored implicitly.
+ // - If format is DimensionType.SPARSE_CSR then we use array_segments and
+ // array_indices to encode that dimension. array_segments represents how
+ // to segment the indices array, each segment corresponds to one element
+ // in the previous dimension. array_indices represents the index of the
+ // non-zero elements within this dimension (as those in the CSR matrix
+ // format, where the first array is row pointers and the second array is
+ // column indices).
+ dense_size:int;
+ array_segments:SparseIndexVector;
+ array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+ // The traversal order of the dimensions defined in the `shape` field of the
+ // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+ // ..., dn-1),
+ // - if not block sparse, the traversal_order is just a permutation of (d0,
+ // ..., dn-1). For example, a 2-D matrix stored in row-major order would
+ // have traversal_order = (d0, d1).
+ // - if block sparse with a k-dimensional block (0 <= k <= n), the
+ // traversal_order has n + k elements. The first n elements are still a
+ // permutation of (d0, ..., dn-1). The lask k elements are a permutation
+ // of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+ // example, a 2-D matrix with 2-D blocks, both stored in row-major order
+ // would have traversal_order = (d0, d1, d2, d3).
+ traversal_order:[int];
+ // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+ // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+ // tensor dimension in (d0, ..., dn).
+ // It's stored in the order of (dn, ..., dn+k-1).
+ // If not block-sparse, this field is NULL.
+ block_map:[int];
+ // In the traversal order defined above, the metadata needed for
+ // each dimension to locate the non-zero values in the original dense tensor.
+ // The size of the dim_metadata array = the size of the traversal_order array
+ // = n + k.
+ dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+ // The tensor shape. The meaning of each entry is operator-specific but
+ // builtin ops use: [batch size, height, width, number of channels] (That's
+ // Tensorflow's NHWC).
+ shape:[int];
+ type:TensorType;
+ // An index that refers to the buffers table at the root of the model. Or,
+ // if there is no data buffer associated (i.e. intermediate results), then
+ // this is 0 (which refers to an always existent empty buffer).
+ //
+ // The data_buffer itself is an opaque container, with the assumption that the
+ // target device is little-endian. In addition, all builtin operators assume
+ // the memory is ordered such that if `shape` is [4, 3, 2], then index
+ // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+ buffer:uint;
+ name:string; // For debugging and importing back into tensorflow.
+ quantization:QuantizationParameters; // Optional.
+
+ is_variable:bool = false;
+
+ // Parameters to encode a sparse tensor. See the example in
+ // tensorflow/lite/testdata/sparse_tensor.json.
+ sparsity:SparsityParameters; // Optional.
+
+ // Encodes `shape` with unknown dimensions. Unknown dimensions are
+ // represented with -1.
+ shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+ ADD = 0,
+ AVERAGE_POOL_2D = 1,
+ CONCATENATION = 2,
+ CONV_2D = 3,
+ DEPTHWISE_CONV_2D = 4,
+ DEPTH_TO_SPACE = 5,
+ DEQUANTIZE = 6,
+ EMBEDDING_LOOKUP = 7,
+ FLOOR = 8,
+ FULLY_CONNECTED = 9,
+ HASHTABLE_LOOKUP = 10,
+ L2_NORMALIZATION = 11,
+ L2_POOL_2D = 12,
+ LOCAL_RESPONSE_NORMALIZATION = 13,
+ LOGISTIC = 14,
+ LSH_PROJECTION = 15,
+ LSTM = 16,
+ MAX_POOL_2D = 17,
+ MUL = 18,
+ RELU = 19,
+ // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+ // since different model developers use RELU1 in different ways. Never
+ // create another op called RELU1.
+ RELU_N1_TO_1 = 20,
+ RELU6 = 21,
+ RESHAPE = 22,
+ RESIZE_BILINEAR = 23,
+ RNN = 24,
+ SOFTMAX = 25,
+ SPACE_TO_DEPTH = 26,
+ SVDF = 27,
+ TANH = 28,
+ CONCAT_EMBEDDINGS = 29,
+ SKIP_GRAM = 30,
+ CALL = 31,
+ CUSTOM = 32,
+ EMBEDDING_LOOKUP_SPARSE = 33,
+ PAD = 34,
+ UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+ GATHER = 36,
+ BATCH_TO_SPACE_ND = 37,
+ SPACE_TO_BATCH_ND = 38,
+ TRANSPOSE = 39,
+ MEAN = 40,
+ SUB = 41,
+ DIV = 42,
+ SQUEEZE = 43,
+ UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+ STRIDED_SLICE = 45,
+ BIDIRECTIONAL_SEQUENCE_RNN = 46,
+ EXP = 47,
+ TOPK_V2 = 48,
+ SPLIT = 49,
+ LOG_SOFTMAX = 50,
+ // DELEGATE is a special op type for the operations which are delegated to
+ // other backends.
+ // WARNING: Experimental interface, subject to change
+ DELEGATE = 51,
+ BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+ CAST = 53,
+ PRELU = 54,
+ MAXIMUM = 55,
+ ARG_MAX = 56,
+ MINIMUM = 57,
+ LESS = 58,
+ NEG = 59,
+ PADV2 = 60,
+ GREATER = 61,
+ GREATER_EQUAL = 62,
+ LESS_EQUAL = 63,
+ SELECT = 64,
+ SLICE = 65,
+ SIN = 66,
+ TRANSPOSE_CONV = 67,
+ SPARSE_TO_DENSE = 68,
+ TILE = 69,
+ EXPAND_DIMS = 70,
+ EQUAL = 71,
+ NOT_EQUAL = 72,
+ LOG = 73,
+ SUM = 74,
+ SQRT = 75,
+ RSQRT = 76,
+ SHAPE = 77,
+ POW = 78,
+ ARG_MIN = 79,
+ FAKE_QUANT = 80,
+ REDUCE_PROD = 81,
+ REDUCE_MAX = 82,
+ PACK = 83,
+ LOGICAL_OR = 84,
+ ONE_HOT = 85,
+ LOGICAL_AND = 86,
+ LOGICAL_NOT = 87,
+ UNPACK = 88,
+ REDUCE_MIN = 89,
+ FLOOR_DIV = 90,
+ REDUCE_ANY = 91,
+ SQUARE = 92,
+ ZEROS_LIKE = 93,
+ FILL = 94,
+ FLOOR_MOD = 95,
+ RANGE = 96,
+ RESIZE_NEAREST_NEIGHBOR = 97,
+ LEAKY_RELU = 98,
+ SQUARED_DIFFERENCE = 99,
+ MIRROR_PAD = 100,
+ ABS = 101,
+ SPLIT_V = 102,
+ UNIQUE = 103,
+ CEIL = 104,
+ REVERSE_V2 = 105,
+ ADD_N = 106,
+ GATHER_ND = 107,
+ COS = 108,
+ WHERE = 109,
+ RANK = 110,
+ ELU = 111,
+ REVERSE_SEQUENCE = 112,
+ MATRIX_DIAG = 113,
+ QUANTIZE = 114,
+ MATRIX_SET_DIAG = 115,
+ ROUND = 116,
+ HARD_SWISH = 117,
+ IF = 118,
+ WHILE = 119,
+ NON_MAX_SUPPRESSION_V4 = 120,
+ NON_MAX_SUPPRESSION_V5 = 121,
+ SCATTER_ND = 122,
+ SELECT_V2 = 123,
+ DENSIFY = 124,
+ SEGMENT_SUM = 125,
+ BATCH_MATMUL = 126,
+ PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+ CUMSUM = 128,
+ CALL_ONCE = 129,
+ BROADCAST_TO = 130,
+ RFFT2D = 131,
+ CONV_3D = 132,
+ IMAG=133,
+ REAL=134,
+ COMPLEX_ABS=135,
+ HASHTABLE = 136,
+ HASHTABLE_FIND = 137,
+ HASHTABLE_IMPORT = 138,
+ HASHTABLE_SIZE = 139,
+ REDUCE_ALL = 140,
+ CONV_3D_TRANSPOSE = 141,
+ VAR_HANDLE = 142,
+ READ_VARIABLE = 143,
+ ASSIGN_VARIABLE = 144,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+ Conv2DOptions,
+ DepthwiseConv2DOptions,
+ ConcatEmbeddingsOptions,
+ LSHProjectionOptions,
+ Pool2DOptions,
+ SVDFOptions,
+ RNNOptions,
+ FullyConnectedOptions,
+ SoftmaxOptions,
+ ConcatenationOptions,
+ AddOptions,
+ L2NormOptions,
+ LocalResponseNormalizationOptions,
+ LSTMOptions,
+ ResizeBilinearOptions,
+ CallOptions,
+ ReshapeOptions,
+ SkipGramOptions,
+ SpaceToDepthOptions,
+ EmbeddingLookupSparseOptions,
+ MulOptions,
+ PadOptions,
+ GatherOptions,
+ BatchToSpaceNDOptions,
+ SpaceToBatchNDOptions,
+ TransposeOptions,
+ ReducerOptions,
+ SubOptions,
+ DivOptions,
+ SqueezeOptions,
+ SequenceRNNOptions,
+ StridedSliceOptions,
+ ExpOptions,
+ TopKV2Options,
+ SplitOptions,
+ LogSoftmaxOptions,
+ CastOptions,
+ DequantizeOptions,
+ MaximumMinimumOptions,
+ ArgMaxOptions,
+ LessOptions,
+ NegOptions,
+ PadV2Options,
+ GreaterOptions,
+ GreaterEqualOptions,
+ LessEqualOptions,
+ SelectOptions,
+ SliceOptions,
+ TransposeConvOptions,
+ SparseToDenseOptions,
+ TileOptions,
+ ExpandDimsOptions,
+ EqualOptions,
+ NotEqualOptions,
+ ShapeOptions,
+ PowOptions,
+ ArgMinOptions,
+ FakeQuantOptions,
+ PackOptions,
+ LogicalOrOptions,
+ OneHotOptions,
+ LogicalAndOptions,
+ LogicalNotOptions,
+ UnpackOptions,
+ FloorDivOptions,
+ SquareOptions,
+ ZerosLikeOptions,
+ FillOptions,
+ BidirectionalSequenceLSTMOptions,
+ BidirectionalSequenceRNNOptions,
+ UnidirectionalSequenceLSTMOptions,
+ FloorModOptions,
+ RangeOptions,
+ ResizeNearestNeighborOptions,
+ LeakyReluOptions,
+ SquaredDifferenceOptions,
+ MirrorPadOptions,
+ AbsOptions,
+ SplitVOptions,
+ UniqueOptions,
+ ReverseV2Options,
+ AddNOptions,
+ GatherNdOptions,
+ CosOptions,
+ WhereOptions,
+ RankOptions,
+ ReverseSequenceOptions,
+ MatrixDiagOptions,
+ QuantizeOptions,
+ MatrixSetDiagOptions,
+ HardSwishOptions,
+ IfOptions,
+ WhileOptions,
+ DepthToSpaceOptions,
+ NonMaxSuppressionV4Options,
+ NonMaxSuppressionV5Options,
+ ScatterNdOptions,
+ SelectV2Options,
+ DensifyOptions,
+ SegmentSumOptions,
+ BatchMatMulOptions,
+ CumsumOptions,
+ CallOnceOptions,
+ BroadcastToOptions,
+ Rfft2dOptions,
+ Conv3DOptions,
+ HashtableOptions,
+ HashtableFindOptions,
+ HashtableImportOptions,
+ HashtableSizeOptions,
+ VarHandleOptions,
+ ReadVariableOptions,
+ AssignVariableOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+ NONE = 0,
+ RELU = 1,
+ RELU_N1_TO_1 = 2,
+ RELU6 = 3,
+ TANH = 4,
+ SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+ padding:Padding;
+ stride_d:int;
+ stride_w:int;
+ stride_h:int;
+ fused_activation_function:ActivationFunctionType;
+ dilation_d_factor:int = 1;
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ filter_width:int;
+ filter_height:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+ // Parameters for DepthwiseConv version 1 or above.
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+ // `depth_multiplier` is redundant. It's used by CPU kernels in
+ // TensorFlow 2.0 or below, but ignored in versions above.
+ // See comments in lite/c/builtin_op_data.h for more details.
+ depth_multiplier:int;
+ fused_activation_function:ActivationFunctionType;
+ // Parameters for DepthwiseConv version 2 or above.
+ dilation_w_factor:int = 1;
+ dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+ num_channels:int;
+ num_columns_per_channel:[int];
+ embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+ UNKNOWN = 0,
+ SPARSE = 1,
+ DENSE = 2,
+}
+
+table LSHProjectionOptions {
+ type: LSHProjectionType;
+}
+
+table SVDFOptions {
+ rank:int;
+ fused_activation_function:ActivationFunctionType;
+ // For weights-only quantization, use asymmetric quantization for non
+ // constant inputs at evaluation time.
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+ fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+ time_major:bool;
+ fused_activation_function:ActivationFunctionType;
+ merge_outputs: bool;
+ asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+ DEFAULT = 0,
+ SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+ // Parameters for FullyConnected version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+
+ // Parameters for FullyConnected version 2 or above.
+ weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+ // Parameters for FullyConnected version 5 or above.
+ // If set to true, then the number of dimension is preserved. Furthermore,
+ // all but the last dimension of the input and output shapes will be equal.
+ keep_num_dims: bool;
+
+ // Parameters for FullyConnected version 7 or above.
+ // If set to true, then weights-only op will use asymmetric quantization for
+ // inputs.
+ asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+ beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+ axis:int;
+ fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+ fused_activation_function:ActivationFunctionType;
+ // Parameters supported by version 3.
+ pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+ // This field is currently ignored in the L2 Norm Op.
+ fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+ radius:int;
+ bias:float;
+ alpha:float;
+ beta:float;
+}
+
+enum LSTMKernelType : byte {
+ // Full LSTM kernel which supports peephole and projection.
+ FULL = 0,
+ // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+ BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+ // Parameters for LSTM version 1 or above.
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // Parameters for LSTM version 2 or above.
+ // Basic kernel is only supported in version 2 or above.
+ kernel_type: LSTMKernelType = FULL;
+
+ // Parameters for LSTM version 4 or above.
+ asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true then first dimension is sequence, otherwise batch.
+ time_major:bool;
+
+ // Parameter for Unidirectional Sequence LSTM version 4.
+ asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+ // Parameters supported by version 1:
+ fused_activation_function:ActivationFunctionType;
+ cell_clip: float; // Optional, 0.0 means no clipping
+ proj_clip: float; // Optional, 0.0 means no clipping
+
+ // If true, store the outputs of both directions into the first output.
+ merge_outputs: bool;
+
+ // Parameters supported by version 2:
+ // If true then first dimension is sequence, otherwise batch.
+ // Version 1 implementations assumed time_major to be true, so this default
+ // value should never change.
+ time_major: bool = true;
+
+ // Parameters for version 3 or above.
+ asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+ new_height: int (deprecated);
+ new_width: int (deprecated);
+ align_corners: bool;
+ half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+ align_corners: bool;
+ half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+ // The subgraph index that needs to be called.
+ subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+ new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+ ngram_size: int;
+ max_skip_size: int;
+ include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+ block_size: int;
+}
+
+table DepthToSpaceOptions {
+ block_size: int;
+}
+
+table SubOptions {
+ fused_activation_function:ActivationFunctionType;
+ // Parameters supported by version 5
+ pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+ fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+ SUM = 0,
+ MEAN = 1,
+ SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+ combiner:CombinerType;
+}
+
+table GatherOptions {
+ axis: int;
+ // Parameters for Gather version 5 or above.
+ batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+ keep_dims: bool;
+}
+
+table SqueezeOptions {
+ squeeze_dims:[int];
+}
+
+table SplitOptions {
+ num_splits: int;
+}
+
+table SplitVOptions {
+ num_splits: int;
+}
+
+table StridedSliceOptions {
+ begin_mask: int;
+ end_mask: int;
+ ellipsis_mask: int;
+ new_axis_mask: int;
+ shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+ in_data_type: TensorType;
+ out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+ output_type : TensorType;
+}
+
+table ArgMinOptions {
+ output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+ padding:Padding;
+ stride_w:int;
+ stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+ validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+ // Optional output type of the operation (int32 or int64). Defaults to int32.
+ out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+ // Parameters supported by version 1:
+ min:float;
+ max:float;
+ num_bits:int;
+
+ // Parameters supported by version 2:
+ narrow_range:bool;
+}
+
+table PackOptions {
+ values_count:int;
+ axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+ axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+ num:int;
+ axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+ alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+ // Doesn't include borders.
+ REFLECT = 0,
+ // Includes borders.
+ SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+ mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+ idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+ seq_dim:int;
+ batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+ then_subgraph_index:int;
+ else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+ init_subgraph_index:int;
+}
+
+table WhileOptions {
+ cond_subgraph_index:int;
+ body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+ adj_x:bool;
+ adj_y:bool;
+ // Parameters for BatchMatMul version 4 or above.
+ // If set to true, then weights-only op will use asymmetric quantization for
+ // inputs.
+ asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+ exclusive:bool;
+ reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+ // The identity of hash tables. This identity will be used across different
+ // subgraphs in the same interpreter instance.
+ table_id:int;
+ key_dtype:TensorType;
+ value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+ container:string;
+ shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+ // This field is for backward compatibility. This field will be used when
+ // the value of the extended builtin_code field has less than
+ // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+ deprecated_builtin_code:byte;
+ custom_code:string;
+
+ // The version of the operator. The version need to be bumped whenever new
+ // parameters are introduced into an op.
+ version:int = 1;
+
+ // This field is introduced for resolving op builtin code shortage problem
+ // (the original BuiltinOperator enum field was represented as a byte).
+ // This field will be used when the value of the extended builtin_code field
+ // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+ builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+ FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+ // Index into the operator_codes array. Using an integer here avoids
+ // complicate map lookups.
+ opcode_index:uint;
+
+ // Optional input are indicated by -1.
+ inputs:[int];
+ outputs:[int];
+
+ builtin_options:BuiltinOptions;
+ custom_options:[ubyte];
+ custom_options_format:CustomOptionsFormat;
+
+ // A list of booleans indicating the input tensors which are being mutated by
+ // this operator.(e.g. used by RNN and LSTM).
+ // For example, if the "inputs" array refers to 5 tensors and the second and
+ // fifth are mutable variables, then this list will contain
+ // [false, true, false, false, true].
+ //
+ // If the list is empty, no variable is mutated in this operator.
+ // The list either has the same length as `inputs`, or is empty.
+ mutating_variable_inputs:[bool];
+
+ // A list of indices to the subgraph's "tensors" that are internal to an Op.
+ // Internal tensors are those that do not flow in or out of the operation,
+ // but instead are part of internal computation. As such, the operation's
+ // implementation may manage its memory more efficiently. They are needed
+ // however (i.e. not just an implementation detail) since they are part of the
+ // computation, which may require relevant metadata such as quantization
+ // parameters.
+ intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+ // A list of all tensors used in this subgraph.
+ tensors:[Tensor];
+
+ // Indices of the tensors that are inputs into this subgraph. Note this is
+ // the list of non-static tensors that feed into the subgraph for inference.
+ inputs:[int];
+
+ // Indices of the tensors that are outputs out of this subgraph. Note this is
+ // the list of output tensors that are considered the product of the
+ // subgraph's inference.
+ outputs:[int];
+
+ // All operators, in execution order.
+ operators:[Operator];
+
+ // Name of this subgraph (used for debugging).
+ name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+ data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+ // A human readable string to uniquely identify a Metadata.
+ name:string;
+ // An index to the buffers table.
+ buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+ // Represents the alias to use for this tensor.
+ name:string;
+
+ // The actual tensor index in the primary graph, that 'name' corresponds to.
+ tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+ // Named inputs for this signature.
+ inputs:[TensorMap];
+
+ // Named outputs for this signature.
+ outputs:[TensorMap];
+
+ // Exported method name for this signature.
+ method_name:string;
+
+ // Key value which was in the Tensorflow SavedModel SignatureDef map.
+ key:string;
+
+ // Subgraph index of the exported method.
+ subgraph_index:uint;
+}
+
+table Model {
+ // Version of the schema.
+ version:uint;
+
+ // A list of all operator codes used in this model. This is
+ // kept in order because operators carry an index into this
+ // vector.
+ operator_codes:[OperatorCode];
+
+ // All the subgraphs of the model. The 0th is assumed to be the main
+ // model.
+ subgraphs:[SubGraph];
+
+ // A description of the model.
+ description:string;
+
+ // Buffers of the model.
+ // Note the 0th entry of this array must be an empty buffer (sentinel).
+ // This is a convention so that tensors without a buffer can provide 0 as
+ // their buffer.
+ buffers:[Buffer];
+
+ // Metadata about the model. Indirects into the existings buffers list.
+ // Deprecated, prefer to use metadata field.
+ metadata_buffer:[int];
+
+ // Metadata about the model.
+ metadata:[Metadata];
+
+ // Optional SignatureDefs for the model.
+ signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/res/TensorFlowLiteSchema/SCHEMA.lst b/res/TensorFlowLiteSchema/SCHEMA.lst
index 73dfacd7b..609ef4b0b 100644
--- a/res/TensorFlowLiteSchema/SCHEMA.lst
+++ b/res/TensorFlowLiteSchema/SCHEMA.lst
@@ -6,3 +6,4 @@ VERSION,URL
2.2.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.2.0/tensorflow/lite/schema/schema.fbs
2.3.0-rc0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.3.0-rc0/tensorflow/lite/schema/schema.fbs
2.3.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.3.0/tensorflow/lite/schema/schema.fbs
+2.6.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.6.0/tensorflow/lite/schema/schema.fbs
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle
index 2e3955c3a..b432929b5 100644
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,7 +8,7 @@ android {
minSdkVersion 26
targetSdkVersion 29
versionCode 1
- versionName "1.17.0"
+ versionName "1.18.0"
externalNativeBuild {
ndkBuild {
diff --git a/runtime/libs/ndarray/CMakeLists.txt b/runtime/libs/ndarray/CMakeLists.txt
new file mode 100644
index 000000000..f88f13186
--- /dev/null
+++ b/runtime/libs/ndarray/CMakeLists.txt
@@ -0,0 +1,23 @@
+add_library(ndarray STATIC src/Array.cpp src/ContiguousSpan.cpp)
+
+set_target_properties(ndarray PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+target_include_directories(ndarray PUBLIC include)
+#can't make this private because of c++ templates
+target_include_directories(ndarray PUBLIC src)
+
+option(NDARRAY_INLINE_TEMPLATES "Set to ON to disable extern declarations for common types")
+
+if(${NDARRAY_INLINE_TEMPLATES})
+ target_compile_definitions(ndarray PUBLIC -DNDARRAY_INLINE_TEMPLATES=1)
+endif()
+
+target_link_libraries(ndarray PRIVATE nnfw_common)
+target_link_libraries(ndarray PRIVATE nnfw_coverage)
+
+if(NOT ENABLE_TEST)
+ return()
+endif(NOT ENABLE_TEST)
+
+add_subdirectory(test)
+add_subdirectory(example)
diff --git a/runtime/libs/ndarray/example/CMakeLists.txt b/runtime/libs/ndarray/example/CMakeLists.txt
new file mode 100644
index 000000000..c4b575dad
--- /dev/null
+++ b/runtime/libs/ndarray/example/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_executable(example_no_array example_no_array.cpp)
+
+add_executable(example_array example_array.cpp)
+target_link_libraries(example_array PRIVATE ndarray)
diff --git a/runtime/libs/ndarray/example/example_array.cpp b/runtime/libs/ndarray/example/example_array.cpp
new file mode 100644
index 000000000..85d274681
--- /dev/null
+++ b/runtime/libs/ndarray/example/example_array.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ndarray/Array.h"
+
+#include <iostream>
+#include <iterator>
+
+using namespace ndarray;
+
+void gather_array(const Array<float> &input, Array<float> &output, const Array<int> &indices)
+{
+ assert(indices.shape().rank() == 3);
+ assert(input.shape().rank() == 3);
+ assert(indices.shape().dim(1) == input.shape().rank());
+
+ for (size_t i = 0; i < indices.shape().dim(0); ++i)
+ {
+ for (size_t j = 0; j < indices.shape().dim(1); ++j)
+ {
+ auto index = indices.slice(i, j);
+ output.slice(i, j).assign(input.slice(index[0], index[1]));
+ }
+ }
+}
+
+int main()
+{
+ // fill tensor of shape[3,3,4] with sequential numbers from [0..36)
+ Shape in_shape{3, 3, 4};
+ std::vector<float> input_data(in_shape.element_count());
+ for (size_t i = 0; i < in_shape.element_count(); ++i)
+ input_data[i] = i;
+
+ Array<float> input(input_data.data(), in_shape);
+
+ // select column-vectors on main diagonal
+ Shape indices_shape{1, 3, 2};
+ std::vector<int> indices_data(indices_shape.element_count());
+ Array<int> indices(indices_data.data(), indices_shape);
+
+ indices.slice(0, 0) = {0, 0};
+ indices.slice(0, 1) = {1, 1};
+ indices.slice(0, 2) = {2, 2};
+
+ Shape output_shape{1, 3, 4};
+ std::vector<float> output_data(output_shape.element_count());
+
+ Array<float> output(output_data.data(), output_shape);
+
+ gather_array(input, output, indices);
+
+ for (size_t i = 0; i < indices_shape.dim(0); ++i)
+ {
+ for (size_t j = 0; j < indices_shape.dim(1); ++j)
+ {
+ auto output_piece = output.slice(i, j);
+ std::ostream_iterator<int> cout_it(std::cout, ", ");
+ std::copy(output_piece.begin(), output_piece.end(), cout_it);
+ std::cout << std::endl;
+ }
+ }
+}
diff --git a/runtime/libs/ndarray/example/example_no_array.cpp b/runtime/libs/ndarray/example/example_no_array.cpp
new file mode 100644
index 000000000..3a4d05dca
--- /dev/null
+++ b/runtime/libs/ndarray/example/example_no_array.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <array>
+#include <vector>
+#include <algorithm>
+#include <cassert>
+#include <iostream>
+
+void gather_no_array(const float *in_data, const std::array<size_t, 3> &dims, float *out_data,
+ const std::array<size_t, 3> &out_dims, //[nselections,
+ const int *indices, const std::array<size_t, 3> &indices_dims)
+{
+ assert(indices_dims[1] == dims.size());
+
+ for (int i = 0; i < indices_dims[0]; ++i)
+ {
+ for (int j = 0; j < indices_dims[1]; ++j)
+ {
+ const int *index_ptr = indices + i * indices_dims[2] * indices_dims[1] + j * indices_dims[2];
+
+ size_t in_offset = index_ptr[0] * dims[2] * dims[1] + index_ptr[1] * dims[2];
+
+ const float *in_ptr = in_data + in_offset;
+
+ size_t out_offset = i * out_dims[2] * out_dims[1] + j * out_dims[2];
+
+ float *out_ptr = out_data + out_offset;
+
+ for (int k = 0; k < dims[2]; ++k)
+ {
+ out_ptr[k] = in_ptr[k];
+ }
+ }
+ }
+}
+
+int main()
+{
+ std::array<size_t, 3> in_dims{3, 3, 4};
+ std::vector<float> input(3 * 3 * 4);
+ for (size_t i = 0; i < 3 * 3 * 4; ++i)
+ input[i] = i;
+
+ std::array<size_t, 3> indices_shape{1, 3, 2};
+ std::vector<int> indices(1 * 3 * 2);
+
+ indices[0] = 0;
+ indices[1] = 0;
+ indices[2] = 1;
+ indices[3] = 1;
+ indices[4] = 2;
+ indices[5] = 2;
+
+ std::array<size_t, 3> output_dims{1, 3, 4};
+ std::vector<float> output(1 * 3 * 4);
+
+ gather_no_array(input.data(), in_dims, output.data(), output_dims, indices.data(), indices_shape);
+
+ for (size_t i = 0; i < output_dims[0]; ++i)
+ {
+ for (size_t j = 0; j < output_dims[1]; ++j)
+ {
+ auto out_ptr = output.data() + i * output_dims[1] * output_dims[2] + j * output_dims[2];
+ for (size_t k = 0; k < output_dims[2]; ++k)
+ {
+ std::cout << out_ptr[k] << ", ";
+ }
+ std::cout << std::endl;
+ }
+ }
+}
diff --git a/runtime/libs/ndarray/include/ndarray/Array.h b/runtime/libs/ndarray/include/ndarray/Array.h
new file mode 100644
index 000000000..09e791763
--- /dev/null
+++ b/runtime/libs/ndarray/include/ndarray/Array.h
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NDARRAY_ARRAY_H_
+#define _NDARRAY_ARRAY_H_
+
+#include "Common.h"
+
+#include "ContiguousSpan.h"
+#include "Shape.h"
+
+#if __cplusplus < 201402L
+#include "detail/cxx14.h" //integer_sequence and make_index_dequence definitions
+#else
+#include <utility>
+#endif
+
+#include <algorithm>
+#include <cassert>
+#include <type_traits>
+#include <array>
+#include <tuple>
+#include <cstddef>
+
+namespace ndarray
+{
+
+// there is no index_sequence before c++14
+#if __cplusplus < 201402L
+
+template <size_t... Nums> using index_sequence = cxx14::index_sequence<Nums...>;
+
+template <size_t Num> using make_index_sequence = cxx14::make_index_sequence<Num>;
+
+#else
+
+template <size_t... Nums> using index_sequence = std::index_sequence<Nums...>;
+
+template <size_t _Num> using make_index_sequence = std::make_index_sequence<_Num>;
+
+#endif //__cplusplus < 201402L
+
+struct Strides
+{
+ explicit Strides(Shape s) : _strides{} { fillStrides(s); }
+
+ int operator[](size_t idx) const noexcept { return _strides[idx]; }
+
+ // since we don't have c++14 fold expression
+ template <typename Seq, typename... Ts> struct _calc_offset;
+
+ template <size_t Num, size_t... Nums, typename T, typename... Ts>
+ struct _calc_offset<index_sequence<Num, Nums...>, T, Ts...>
+ {
+ static constexpr size_t get(const std::array<int, 8> &strides, int x, Ts... xs)
+ {
+ return _calc_offset<index_sequence<Nums...>, Ts...>::get(strides, xs...) +
+ x * std::get<Num>(strides);
+ }
+ };
+
+ template <size_t Num, typename T> struct _calc_offset<index_sequence<Num>, T>
+ {
+ static constexpr size_t get(const std::array<int, 8> &strides, int x)
+ {
+ return x * std::get<Num>(strides);
+ }
+ };
+
+ template <typename Seq, typename... Ts> constexpr size_t offset(Seq, Ts... x) const noexcept
+ {
+ // return ( 0 + ... + (std::get<Nums>(_strides) * x)); in c++14
+ return _calc_offset<Seq, Ts...>::get(_strides, x...);
+ }
+
+private:
+ void fillStrides(const Shape &s) noexcept
+ {
+ int rank = s.rank();
+ _strides[rank - 1] = 1;
+ for (int d = rank - 2; d >= 0; --d)
+ {
+ _strides[d] = _strides[d + 1] * s.dim(d + 1);
+ }
+ }
+
+ std::array<int, NDARRAY_MAX_DIMENSION_COUNT> _strides;
+};
+
+template <typename T> class Array
+{
+public:
+ Array(T *data, Shape shape) noexcept : _data(data), _shape(shape), _strides(shape) {}
+
+ Array(const Array &) = delete;
+
+ Array(Array &&a) noexcept : _data(a._data), _shape(a._shape), _strides(a._strides)
+ {
+ a._data = nullptr;
+ }
+
+ template <typename... Ts> T &at(Ts... x) const noexcept { return _at(static_cast<size_t>(x)...); }
+
+ /**
+ * @brief returns last dimension as ContigniousSpan
+ * @param x indices of slice to take. See tests for usage details
+ * @return slice at given position
+ */
+ template <typename... Ts> ContiguousSpan<T, std::is_const<T>::value> slice(Ts... x) noexcept
+ {
+ assert(sizeof...(Ts) == _shape.rank() - 1);
+ return {&at(x..., 0ul), _shape.dim(_shape.rank() - 1)};
+ }
+
+ /**
+ * @brief returns last dimension as ContigniousSpan
+ * @param x indices of slice to take. See tests for usage details
+ * @return slice at given position
+ */
+ template <typename... Ts> ContiguousSpan<T, true> slice(Ts... x) const noexcept
+ {
+ assert(sizeof...(Ts) == _shape.rank() - 1);
+ return {&at(x..., 0ul), _shape.dim(_shape.rank() - 1)};
+ }
+
+ ContiguousSpan<T, std::is_const<T>::value> flat() noexcept
+ {
+ return {_data, _shape.element_count()};
+ }
+
+ ContiguousSpan<T, true> flat() const noexcept { return {_data, _shape.element_count()}; }
+
+ const Shape &shape() const noexcept { return _shape; }
+
+private:
+ template <typename... Ts> T &_at(Ts... x) const noexcept
+ {
+ assert(sizeof...(x) == _shape.rank());
+ using Indices = make_index_sequence<sizeof...(Ts)>;
+ return _data[offset(Indices{}, x...)];
+ }
+
+ template <typename... Ts, size_t... Nums>
+ size_t offset(index_sequence<Nums...> seq, Ts... x) const noexcept
+ {
+ static_assert(
+ sizeof...(Ts) == sizeof...(Nums),
+ "Sanity check failed. Generated index sequence size is not equal to argument count");
+
+ return _strides.offset(seq, x...);
+ }
+
+ T *_data;
+ Shape _shape;
+ Strides _strides;
+};
+
+template <typename To, typename From> Array<To> array_cast(Array<From> &&from, Shape newShape)
+{
+ assert(from.shape().element_count() / (sizeof(To) / sizeof(From)) == newShape.element_count());
+ return Array<To>(reinterpret_cast<To *>(from.flat().data()), newShape);
+}
+
+template <typename To, typename From>
+Array<const To> array_cast(const Array<From> &from, Shape newShape)
+{
+ assert(from.shape().element_count() / (sizeof(To) / sizeof(From)) == newShape.element_count());
+ return Array<To>(reinterpret_cast<const To *>(from.flat().data()), newShape);
+}
+
+#ifndef NDARRAY_INLINE_TEMPLATES
+
+extern template class Array<float>;
+extern template class Array<int32_t>;
+extern template class Array<uint32_t>;
+extern template class Array<uint8_t>;
+
+#endif // NDARRAY_INLINE_TEMPLATES
+
+} // namespace ndarray
+
+#endif //_NDARRAY_ARRAY_H_
diff --git a/runtime/libs/ndarray/include/ndarray/Common.h b/runtime/libs/ndarray/include/ndarray/Common.h
new file mode 100644
index 000000000..aa0cc6fe2
--- /dev/null
+++ b/runtime/libs/ndarray/include/ndarray/Common.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NDARRAY_COMMON_H_
+#define _NDARRAY_COMMON_H_
+
+#define NDARRAY_MAX_DIMENSION_COUNT 8
+
+#endif //_NDARRAY_COMMON_H_
diff --git a/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h b/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h
new file mode 100644
index 000000000..b322b77db
--- /dev/null
+++ b/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NDARRAY_CONTIGNIOUS_SPAN_H_
+#define _NDARRAY_CONTIGNIOUS_SPAN_H_
+
+#include <type_traits>
+#include <vector>
+#include <cstdint>
+#include <cstddef>
+#include <cassert>
+
+namespace ndarray
+{
+
+template <typename T, bool isConst = false> class ContiguousSpan
+{
+public:
+ using pointer_type = typename std::conditional<isConst, const T *, T *>::type;
+ using reference_type = typename std::conditional<isConst, const T &, T &>::type;
+ using iterator_type = pointer_type;
+
+ ContiguousSpan(pointer_type data, size_t len) noexcept : _data(data), _len(len) {}
+
+ template <typename It>
+ explicit ContiguousSpan(It first, It last) noexcept
+ : _data(&*first), _len(std::distance(first, last))
+ {
+ }
+
+ ContiguousSpan(const ContiguousSpan &) = delete;
+
+ ContiguousSpan(ContiguousSpan &&s) noexcept : _data(s._data), _len(s._len) { s._data = nullptr; }
+
+ operator ContiguousSpan<T, true>() { return ContiguousSpan<T, true>{_data, _len}; }
+
+ reference_type operator[](size_t idx) const noexcept { return _data[idx]; }
+
+ reference_type at(size_t idx) const noexcept { return _data[idx]; }
+
+ ContiguousSpan<T, isConst> offset(size_t offset)
+ {
+ assert(offset <= _len);
+ return {_data + offset, _len - offset};
+ }
+
+ template <typename From, bool _ = isConst>
+ typename std::enable_if<!_, void>::type assign(const From &f) noexcept
+ {
+ assignFrom(std::begin(f), std::end(f));
+ }
+
+ template <typename U, bool _ = isConst>
+ typename std::enable_if<!_, ContiguousSpan &>::type
+ operator=(std::initializer_list<U> list) noexcept
+ {
+ assignFrom(std::begin(list), std::end(list));
+ return *this;
+ }
+
+ template <typename It, bool _ = isConst>
+ typename std::enable_if<!_, void>::type assignFrom(It first, It last) noexcept
+ {
+ std::copy(first, last, begin());
+ }
+
+ size_t size() const { return _len; }
+
+ iterator_type begin() const { return iterator_type{_data}; }
+
+ iterator_type end() const { return iterator_type{_data + _len}; }
+
+ pointer_type data() { return _data; }
+
+private:
+ pointer_type _data;
+ size_t _len;
+};
+
+#ifndef NDARRAY_INLINE_TEMPLATES
+
+extern template class ContiguousSpan<float, true>;
+extern template class ContiguousSpan<float, false>;
+extern template class ContiguousSpan<int32_t, true>;
+extern template class ContiguousSpan<int32_t, false>;
+extern template class ContiguousSpan<uint32_t, true>;
+extern template class ContiguousSpan<uint32_t, false>;
+extern template class ContiguousSpan<uint8_t, true>;
+extern template class ContiguousSpan<uint8_t, false>;
+
+#endif // NDARRAY_INLINE_TEMPLATES
+
+} // namespace ndarray
+
+#endif //_NDARRAY_CONTIGNIOUS_SPAN_H_
diff --git a/runtime/libs/ndarray/include/ndarray/Shape.h b/runtime/libs/ndarray/include/ndarray/Shape.h
new file mode 100644
index 000000000..fa58613b8
--- /dev/null
+++ b/runtime/libs/ndarray/include/ndarray/Shape.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NDARRAY_SHAPE_H_
+#define _NDARRAY_SHAPE_H_
+
+#include "Common.h"
+
+#include <array>
+#include <cassert>
+#include <cstddef>
+
+namespace ndarray
+{
+
+class Shape
+{
+public:
+ //_dims{} here and later since array does not have std::initializer_list ctor
+ // and aggregate initialization is not allowed here
+ explicit Shape(size_t rank) noexcept : _dims{}, _rank(rank)
+ {
+ std::fill(_dims.begin(), _dims.end(), 0);
+ }
+
+ Shape(std::initializer_list<size_t> list) noexcept : _dims{}, _rank(list.size())
+ {
+ std::copy(list.begin(), list.end(), _dims.begin());
+ }
+
+ size_t dim(int i) const noexcept { return _dims.at(i); }
+
+ size_t &dim(int i) noexcept { return _dims.at(i); }
+
+ size_t element_count() const noexcept
+ {
+ uint32_t res = 1;
+ for (size_t i = 0; i < rank(); ++i)
+ res *= dim(i);
+ assert(res <= 0xffffffff);
+ return res;
+ }
+
+ size_t rank() const noexcept { return _rank; }
+
+private:
+ std::array<size_t, NDARRAY_MAX_DIMENSION_COUNT> _dims;
+ size_t _rank;
+};
+
+} // namespace ndarray
+
+#endif //_NDARRAY_SHAPE_H_
diff --git a/runtime/libs/ndarray/src/Array.cpp b/runtime/libs/ndarray/src/Array.cpp
new file mode 100644
index 000000000..f9c9de9d3
--- /dev/null
+++ b/runtime/libs/ndarray/src/Array.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ndarray/Array.h"
+
+namespace ndarray
+{
+
+template class Array<float>;
+template class Array<int32_t>;
+template class Array<uint32_t>;
+template class Array<uint8_t>;
+
+} // namespace ndarray
diff --git a/runtime/libs/ndarray/src/ContiguousSpan.cpp b/runtime/libs/ndarray/src/ContiguousSpan.cpp
new file mode 100644
index 000000000..e06cfc2a1
--- /dev/null
+++ b/runtime/libs/ndarray/src/ContiguousSpan.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ndarray/ContiguousSpan.h"
+
+namespace ndarray
+{
+
+template class ContiguousSpan<float, true>;
+template class ContiguousSpan<float, false>;
+template class ContiguousSpan<int32_t, true>;
+template class ContiguousSpan<int32_t, false>;
+template class ContiguousSpan<uint32_t, true>;
+template class ContiguousSpan<uint32_t, false>;
+template class ContiguousSpan<uint8_t, true>;
+template class ContiguousSpan<uint8_t, false>;
+
+} // namespace ndarray
diff --git a/runtime/libs/ndarray/src/detail/cxx14.h b/runtime/libs/ndarray/src/detail/cxx14.h
new file mode 100644
index 000000000..8b78fb985
--- /dev/null
+++ b/runtime/libs/ndarray/src/detail/cxx14.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NDARRAY_CXX14_H_
+#define _NDARRAY_CXX14_H_
+
+namespace ndarray
+{
+
+namespace cxx14
+{
+
+template <size_t... Nums> struct index_sequence
+{
+ using value_type = size_t;
+
+ static constexpr std::size_t size() noexcept { return sizeof...(Nums); }
+};
+
+namespace detail
+{
+
+template <size_t v, typename Seq> struct _append;
+
+template <size_t v, size_t... Nums> struct _append<v, index_sequence<Nums...>>
+{
+ using result = index_sequence<Nums..., v>;
+};
+
+template <size_t Len> struct make_index_sequence
+{
+ using result =
+ typename detail::_append<Len - 1, typename make_index_sequence<Len - 1>::result>::result;
+};
+
+template <> struct make_index_sequence<1>
+{
+ using result = index_sequence<0>;
+};
+
+template <> struct make_index_sequence<0>
+{
+ using result = index_sequence<>;
+};
+
+} // namespace detail
+
+template <size_t Num> using make_index_sequence = typename detail::make_index_sequence<Num>::result;
+
+} // namespace cxx14
+
+} // namespace ndarray
+
+#endif //_NDARRAY_CXX14_H_
diff --git a/runtime/libs/ndarray/test/CMakeLists.txt b/runtime/libs/ndarray/test/CMakeLists.txt
new file mode 100644
index 000000000..be1ed6510
--- /dev/null
+++ b/runtime/libs/ndarray/test/CMakeLists.txt
@@ -0,0 +1,18 @@
+if(NOT TARGET ndarray)
+ return()
+endif()
+
+add_executable(ndarray_test ndarray_test.cpp)
+
+target_link_libraries(ndarray_test PRIVATE ndarray)
+
+nnfw_find_package(GTest)
+if(NOT GTest_FOUND)
+ message(STATUS "GTest not avaialble. Skipping NDArray test build")
+ return()
+endif(NOT GTest_FOUND)
+
+target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
+
+add_test(ndarray_test ndarray_test)
+install(TARGETS ndarray_test DESTINATION unittest_standalone)
diff --git a/runtime/libs/ndarray/test/ndarray_test.cpp b/runtime/libs/ndarray/test/ndarray_test.cpp
new file mode 100644
index 000000000..4b5ad5765
--- /dev/null
+++ b/runtime/libs/ndarray/test/ndarray_test.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+
+#include "ndarray/Array.h"
+
+using namespace ndarray;
+
+TEST(NDArray_tests, basic_data_test)
+{
+
+ float raw_data[] = {1, 2, 3, 4};
+
+ Array<float> data22{raw_data, {2, 2}};
+
+ ASSERT_FLOAT_EQ(data22.at(0, 0), 1);
+ ASSERT_FLOAT_EQ(data22.at(0, 1), 2);
+ ASSERT_FLOAT_EQ(data22.at(1, 0), 3);
+ ASSERT_FLOAT_EQ(data22.at(1, 1), 4);
+ ASSERT_EQ(data22.shape().rank(), 2);
+ ASSERT_EQ(data22.shape().dim(0), 2);
+ ASSERT_EQ(data22.shape().dim(1), 2);
+
+ Array<float> data14{raw_data, {1, 4}};
+ ASSERT_FLOAT_EQ(data14.at(0, 0), 1);
+ ASSERT_FLOAT_EQ(data14.at(0, 1), 2);
+ ASSERT_FLOAT_EQ(data14.at(0, 2), 3);
+ ASSERT_FLOAT_EQ(data14.at(0, 3), 4);
+ ASSERT_EQ(data14.shape().rank(), 2);
+ ASSERT_EQ(data14.shape().dim(0), 1);
+ ASSERT_EQ(data14.shape().dim(1), 4);
+
+ ContiguousSpan<float> cs = data22.flat();
+ ASSERT_EQ(cs.size(), 4);
+ ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+ Array<float> lv = std::move(data14);
+ ASSERT_FLOAT_EQ(lv.at(0, 0), 1);
+ ASSERT_FLOAT_EQ(lv.at(0, 1), 2);
+ ASSERT_FLOAT_EQ(lv.at(0, 2), 3);
+ ASSERT_FLOAT_EQ(lv.at(0, 3), 4);
+}
+
+TEST(NDArray_tests, slice_write_test)
+{
+ float raw_data[4] = {0};
+
+ Array<float> data22{raw_data, {2, 2}};
+
+ data22.slice(1) = {1, 2};
+
+ ASSERT_FLOAT_EQ(data22.at(0, 0), 0);
+ ASSERT_FLOAT_EQ(data22.at(0, 1), 0);
+ ASSERT_FLOAT_EQ(data22.at(1, 0), 1);
+ ASSERT_FLOAT_EQ(data22.at(1, 1), 2);
+}
+
+TEST(NDArray_tests, slice_read_test)
+{
+ float raw_data[4] = {1, 2, 3, 4};
+
+ Array<float> data22{raw_data, {2, 2}};
+
+ auto slice = data22.slice(1);
+
+ ASSERT_FLOAT_EQ(slice[0], 3);
+ ASSERT_FLOAT_EQ(slice[1], 4);
+}
+
+TEST(NDArray_tests, multidim_test)
+{
+ float raw_data[5] = {0, 1, 2, 3, 4};
+
+ Array<float> data22{raw_data, {1, 1, 1, 1, 5}};
+
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+ ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+}
+
+TEST(NDArray_tests, slice_assign_test)
+{
+ std::vector<float> v1{1, 2, 3, 4, 5};
+ std::vector<float> v2(5);
+
+ ContiguousSpan<float> span1(v1.begin(), v1.end());
+ ContiguousSpan<float> span2(v2.begin(), v2.end());
+
+ span2.assign(span1);
+
+ ASSERT_EQ(v1, v2);
+ ASSERT_EQ(span1.size(), 5);
+ ASSERT_EQ(span2.size(), 5);
+
+ ASSERT_EQ(span2.at(2), 3);
+ ASSERT_EQ(span2.at(4), 5);
+
+ ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
+
+ ContiguousSpan<float> span3(span2.offset(1));
+ ASSERT_EQ(span3.size(), 4);
+ ASSERT_EQ(span3.at(0), 2);
+ ASSERT_EQ(span3.at(1), 3);
+ ASSERT_EQ(span3.at(2), 4);
+ ASSERT_EQ(span3.at(3), 5);
+}
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h
index 6624ae676..4fce291a0 100644
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
* NNFW_VERSION is a uint32 value representing nnfw runtime version
* in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
*/
-#define NNFW_VERSION 0x01001100
+#define NNFW_VERSION 0x01001200
#endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt
index 1e5443263..b61e58251 100644
--- a/runtime/onert/backend/cpu/CMakeLists.txt
+++ b/runtime/onert/backend/cpu/CMakeLists.txt
@@ -12,6 +12,7 @@ target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_common)
target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage)
target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ruy)
target_link_libraries(${LIB_ONERT_BACKEND_CPU} INTERFACE ruy_instrumentation)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ndarray)
set_target_properties(${LIB_ONERT_BACKEND_CPU} PROPERTIES OUTPUT_NAME backend_cpu)
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 59fb68d55..75274dc88 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -35,6 +35,7 @@
#include "ops/GatherLayer.h"
#include "ops/LSTMLayer.h"
#include "ops/MeanLayer.h"
+#include "ops/DetectionPostProcessLayer.h"
#include "ops/OneHotLayer.h"
#include "ops/OperationUtils.h"
#include "ops/PackLayer.h"
@@ -1177,6 +1178,51 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
_return_fn = std::move(fn);
}
+void KernelGenerator::visit(const ir::operation::DetectionPostProcess &node)
+{
+ using NMS = ir::operation::DetectionPostProcess;
+
+ ops::DetectionPostProcessLayer::DetectionPostProcessParameters parameters;
+ parameters.scales.y = node.param().scale.y_scale;
+ parameters.scales.x = node.param().scale.x_scale;
+ parameters.scales.w = node.param().scale.w_scale;
+ parameters.scales.h = node.param().scale.h_scale;
+
+ parameters.iou_threshold = node.param().iou_threshold;
+ parameters.score_threshold = node.param().score_threshold;
+ parameters.max_boxes_per_class = node.param().max_boxes_per_class;
+ parameters.max_detections = node.param().max_detections;
+ parameters.num_classes = node.param().num_classes;
+ parameters.center_box_format = node.param().center_size_boxes;
+ parameters.max_classes_per_detection = node.param().max_classes_per_detection;
+
+ auto boxes_index = node.getInputs().at(NMS::Input::BOXES);
+ auto scores_index = node.getInputs().at(NMS::Input::SCORES);
+ auto anchors_index = node.getInputs().at(NMS::Input::INPUT_ANCHORS);
+
+ auto o_classes_index = node.getOutputs().at(NMS::Output::BOX_CLASSES);
+ auto o_coords_index = node.getOutputs().at(NMS::Output::BOX_COORDS);
+ auto o_scores_index = node.getOutputs().at(NMS::Output::BOX_SCORES);
+ auto o_num_selected_index = node.getOutputs().at(NMS::Output::NUM_SELECTED);
+
+ parameters.boxes_descr = _ctx.at(boxes_index).shape().dims();
+ parameters.scrores_descr = _ctx.at(scores_index).shape().dims();
+
+ parameters.boxes_input = _tensor_reg->getPortableTensor(boxes_index);
+ parameters.scores_input = _tensor_reg->getPortableTensor(scores_index);
+ parameters.anchors_input = _tensor_reg->getPortableTensor(anchors_index);
+
+ parameters.box_classes_output = _tensor_reg->getPortableTensor(o_classes_index);
+ parameters.box_coords_output = _tensor_reg->getPortableTensor(o_coords_index);
+ parameters.box_scores_output = _tensor_reg->getPortableTensor(o_scores_index);
+ parameters.num_selections_output = _tensor_reg->getPortableTensor(o_num_selected_index);
+
+ auto fn = std::make_unique<ops::DetectionPostProcessLayer>();
+ fn->configure(std::move(parameters));
+
+ _return_fn = std::move(fn);
+}
+
void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
{
const auto output_index{node.getOutputs().at(0)};
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index d452d0ba6..d7d5fe6fc 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -69,6 +69,7 @@ public:
void visit(const ir::operation::LogSoftmax &) override;
void visit(const ir::operation::LSTM &) override;
void visit(const ir::operation::MatrixBandPart &) override;
+ void visit(const ir::operation::DetectionPostProcess &) override;
void visit(const ir::operation::OneHot &) override;
void visit(const ir::operation::Pack &) override;
void visit(const ir::operation::Pad &) override;
diff --git a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc
new file mode 100644
index 000000000..8a6fe6504
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DetectionPostProcessLayer.h"
+
+#include "ndarray/Array.h"
+
+#include <numeric>
+#include <utility>
+#include <cmath>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+
+using namespace ndarray;
+
+using CenterSizeBox = DetectionPostProcessLayer::CenterSizeBox;
+using CornerBox = DetectionPostProcessLayer::CornerBox;
+
+using NonMaxSuppressionParam = DetectionPostProcessLayer::DetectionPostProcessParameters;
+using Allocations = DetectionPostProcessLayer::Allocations;
+
+struct OutputArrays
+{
+ OutputArrays(CornerBox *coords_buf, float *scores_buf, float *classes_buf,
+ int *num_selections_buf, size_t max_detections)
+ : coords(coords_buf, {max_detections}), scores(scores_buf, {max_detections}),
+ classes(classes_buf, {max_detections}), num_selections(num_selections_buf, {1})
+ {
+ }
+
+ Array<CornerBox> coords;
+ Array<float> scores;
+ Array<float> classes;
+ Array<int> num_selections;
+};
+
+struct TemporaryArrays
+{
+ TemporaryArrays(int *selections_buffer, int max_detections)
+ : selections(selections_buffer, {static_cast<unsigned long>(max_detections)})
+ {
+ }
+
+ Array<int> selections;
+};
+
+// sort indices in decreasing order of first `k` scores
+void PartialArgSort(const ContiguousSpan<float, true> &scores,
+ const ContiguousSpan<int, false> &indices, int k)
+{
+ std::iota(indices.begin(), indices.begin() + k, 0);
+ std::partial_sort(indices.begin(), indices.begin() + k, indices.begin() + scores.size(),
+ [&scores](const int i, const int j) { return scores[i] > scores[j]; });
+}
+
+template <typename T> ContiguousSpan<T, false> static vecToSpan(std::vector<T> &v)
+{
+ return ContiguousSpan<T, false>{v.begin(), v.end()};
+}
+
+Array<const CornerBox> decodeBoxes(const Array<float> &raw_boxes, const Array<float> &raw_anchors,
+ bool center_box_format, const CenterSizeBox &scales)
+{
+ auto nbatches = raw_boxes.shape().dim(0);
+ auto num_boxes = raw_boxes.shape().dim(1);
+
+ auto anchors = array_cast<const CenterSizeBox>(raw_anchors, {num_boxes});
+
+ if (!center_box_format)
+ {
+ auto boxes_p = reinterpret_cast<const CornerBox *>(raw_boxes.flat().data());
+ return {boxes_p, {num_boxes}};
+ }
+ else
+ {
+ // TODO support box center-width encoding correctly
+ // i.e anchors
+ auto boxes_p = reinterpret_cast<const CenterSizeBox *>(raw_boxes.flat().data());
+ Array<const CenterSizeBox> in_boxes{boxes_p, {num_boxes}};
+
+ auto decoded_boxes_p = new CornerBox[nbatches * num_boxes];
+ Array<CornerBox> decoded_boxes_a{decoded_boxes_p, {num_boxes}};
+
+ for (size_t i = 0; i < num_boxes; ++i)
+ {
+ auto anchor = anchors.at(i);
+ auto &box = decoded_boxes_a.at(i);
+ float yc = in_boxes.at(i).y / scales.y * anchor.h + anchor.y;
+ float xc = in_boxes.at(i).x / scales.x * anchor.w + anchor.x;
+ float halfh = 0.5f * std::exp(in_boxes.at(i).h / scales.h) * anchor.h;
+ float halfw = 0.5f * std::exp(in_boxes.at(i).w / scales.w) * anchor.w;
+ box.x1 = xc - halfw;
+ box.x2 = xc + halfw;
+ box.y1 = yc - halfh;
+ box.y2 = yc + halfh;
+
+ assert(box.x2 > box.x1);
+ assert(box.y2 > box.y1);
+ }
+
+ return array_cast<const CornerBox>(std::move(decoded_boxes_a), decoded_boxes_a.shape());
+ }
+}
+
+float computeIOU(const CornerBox &box1, const CornerBox &box2)
+{
+ float area_i = (box1.y2 - box1.y1) * (box1.x2 - box1.x1);
+ float area_j = (box2.y2 - box2.y1) * (box2.x2 - box2.x1);
+ if (area_i <= 0 || area_j <= 0)
+ {
+ return 0.0;
+ }
+ float in_ymin = std::max<float>(box1.y1, box2.y1);
+ float in_xmin = std::max<float>(box1.x1, box2.x1);
+ float in_ymax = std::min<float>(box1.y2, box2.y2);
+ float in_xmax = std::min<float>(box1.x2, box2.x2);
+ float in_area = std::max<float>(in_ymax - in_ymin, 0.0) * std::max<float>(in_xmax - in_xmin, 0.0);
+
+ return in_area / (area_i + area_j - in_area);
+}
+
+int doSingleClass(const Array<const CornerBox> &boxes, const std::vector<float> &scores,
+ const NonMaxSuppressionParam &param, TemporaryArrays &temps,
+ size_t max_detections)
+{
+ auto num_boxes = boxes.shape().dim(0);
+
+ std::vector<int> sorted_box_indices(num_boxes);
+ PartialArgSort(ContiguousSpan<float, true>(scores.data(), num_boxes),
+ vecToSpan(sorted_box_indices), num_boxes);
+
+ // TODO move to temp allocations
+ std::vector<int> process_box(num_boxes, 1);
+
+ size_t selected_count = 0;
+ for (size_t i = 0; i < num_boxes; ++i)
+ {
+ auto box_index = sorted_box_indices[i];
+
+ if (!process_box[box_index] || scores[box_index] < param.score_threshold)
+ {
+ continue;
+ }
+
+ temps.selections.at(selected_count) = box_index;
+ selected_count++;
+
+ if (selected_count >= max_detections)
+ {
+ break;
+ }
+
+ for (size_t j = i + 1; j < num_boxes; ++j)
+ {
+ if (!process_box[sorted_box_indices[j]])
+ {
+ continue;
+ }
+
+ float IOU = computeIOU(boxes.at(box_index), boxes.at(sorted_box_indices[j]));
+ if (IOU > param.iou_threshold)
+ {
+ process_box[sorted_box_indices[j]] = 0;
+ }
+ }
+ }
+
+ return selected_count;
+}
+
+void collectBoxes(TemporaryArrays &temporary, const Array<const CornerBox> &decoded_boxes,
+ std::vector<float> &scores, int num_selected, OutputArrays &output,
+ const Array<int> &sorted_classes, int detections_per_box)
+{
+ auto &selections = temporary.selections;
+
+ size_t output_box_count = 0;
+
+ for (int i = 0; i < num_selected; ++i)
+ {
+ int selected_box = selections.at(output_box_count);
+
+ for (int c = 0; c < detections_per_box; ++c)
+ {
+ output.classes.at(output_box_count) = sorted_classes.at(selected_box, c);
+ output.scores.at(output_box_count) = scores[selected_box];
+ output.coords.at(output_box_count) = decoded_boxes.at(selected_box);
+ output_box_count++;
+ }
+ }
+}
+
+void DetectionPostProcess(const Array<float> &boxes_a, const Array<float> &scores_a,
+ Array<float> &num_selected_a, const NonMaxSuppressionParam &param,
+ const Allocations &allocations, OutputArrays &outputs)
+{
+ TemporaryArrays temporary(allocations.selections_buffer, param.max_detections);
+
+ // Only batch of 1 is supported atm
+ auto num_boxes = boxes_a.shape().dim(1);
+ size_t num_classes = param.num_classes;
+ size_t num_classes_with_background = scores_a.shape().dim(2);
+ bool have_background = num_classes_with_background != num_classes;
+
+ size_t max_classes_per_box = std::min<size_t>(num_classes, param.max_classes_per_detection);
+
+ // TODO move this to allocations
+ std::vector<int> sorted_class_indices(num_boxes * num_classes);
+
+ Array<int> class_indices(sorted_class_indices.data(), {num_boxes, num_classes});
+
+ // TODO move to allocations
+ std::vector<float> max_scores(num_boxes);
+
+ for (size_t row = 0; row < num_boxes; row++)
+ {
+ auto box_scores = scores_a.slice(0, row).offset(have_background ? 1 : 0);
+ auto indices = class_indices.slice(row);
+
+ PartialArgSort(box_scores, indices, num_classes);
+
+ max_scores[row] = box_scores[indices[0]];
+ }
+
+ auto anchors_a =
+ Array<float>(reinterpret_cast<float *>(param.anchors_input->buffer()), {num_boxes, 4});
+ auto decoded_boxes = decodeBoxes(boxes_a, anchors_a, param.center_box_format, param.scales);
+
+ int num_selected =
+ doSingleClass(decoded_boxes, max_scores, param, temporary, param.max_detections);
+
+ collectBoxes(temporary, decoded_boxes, max_scores, num_selected, outputs, class_indices,
+ max_classes_per_box);
+
+ num_selected_a.at(0) = num_selected;
+}
+} // namespace
+
+template <typename T> Array<T> toArray(uint8_t *ptr, std::vector<int32_t> &descr)
+{
+ ndarray::Shape shape(descr.size());
+ for (size_t i = 0; i < descr.size(); ++i)
+ {
+ shape.dim(i) = descr[i];
+ }
+
+ return Array<T>{reinterpret_cast<T *>(ptr), shape};
+}
+
+void DetectionPostProcessLayer::configure(DetectionPostProcessParameters parameters)
+{
+ _parameters = std::move(parameters);
+ _allocations.selections_buffer = new int[_parameters.max_detections * 2];
+}
+
+void DetectionPostProcessLayer::run()
+{
+ auto nbatches = (unsigned int)_parameters.boxes_descr[0];
+ // no suport for batch other than 1( it's fine since tflite does not support
+ // batch for postprocess either )
+ assert(nbatches == 1);
+
+ auto boxes_a = toArray<float>(_parameters.boxes_input->buffer(), _parameters.boxes_descr);
+ auto scores_a = toArray<float>(_parameters.scores_input->buffer(), _parameters.scrores_descr);
+
+ auto num_selected_a = ndarray::Array<float>(
+ reinterpret_cast<float *>(_parameters.num_selections_output->buffer()), {nbatches});
+
+ OutputArrays outputArrays(reinterpret_cast<CornerBox *>(_parameters.box_coords_output->buffer()),
+ reinterpret_cast<float *>(_parameters.box_scores_output->buffer()),
+ reinterpret_cast<float *>(_parameters.box_classes_output->buffer()),
+ reinterpret_cast<int *>(_parameters.num_selections_output->buffer()),
+ _parameters.max_detections);
+
+ DetectionPostProcess(boxes_a, scores_a, num_selected_a, _parameters, _allocations, outputArrays);
+}
+
+DetectionPostProcessLayer::~DetectionPostProcessLayer() { delete[] _allocations.selections_buffer; }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.h b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.h
new file mode 100644
index 000000000..836a70cac
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_DPP_H__
+#define __ONERT_BACKEND_CPU_OPS_DPP_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class DetectionPostProcessLayer : public ::onert::exec::IFunction
+{
+public:
+ struct CornerBox
+ {
+ float y1, x1;
+ float y2, x2;
+ };
+
+ struct CenterSizeBox
+ {
+ float y, x;
+ float h, w;
+ };
+
+ struct DetectionPostProcessParameters
+ {
+ const IPortableTensor *boxes_input;
+ const IPortableTensor *scores_input;
+ const IPortableTensor *anchors_input;
+ IPortableTensor *box_coords_output;
+ IPortableTensor *box_classes_output;
+ IPortableTensor *box_scores_output;
+ IPortableTensor *num_selections_output;
+ std::vector<int32_t> boxes_descr;
+ std::vector<int32_t> scrores_descr;
+
+ uint32_t max_detections;
+ float score_threshold;
+ float iou_threshold; // intersection-over-union
+ uint32_t max_boxes_per_class;
+ bool center_box_format = false;
+ int32_t num_classes;
+ int32_t max_classes_per_detection;
+ CenterSizeBox scales;
+ };
+
+ enum SelectionFormat
+ {
+ BOX_INDEX = 1,
+ CLASS_INDEX = 0
+ };
+
+ struct Allocations
+ {
+ int *selections_buffer = nullptr;
+ // TODO move all dynamic allocations here, and into configure phase
+ };
+
+ DetectionPostProcessLayer() : _parameters{}
+ {
+ // DO NOTHING
+ }
+
+ virtual ~DetectionPostProcessLayer();
+
+public:
+ void configure(DetectionPostProcessParameters parameters);
+
+ void run() override;
+
+private:
+ DetectionPostProcessParameters _parameters;
+
+ Allocations _allocations;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_DPP_H__
diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h
index 2e484e649..b2272e262 100644
--- a/runtime/onert/core/include/compiler/StaticShapeInferer.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h
@@ -112,6 +112,7 @@ private:
void visit(const ir::operation::Transpose &op) override;
void visit(const ir::operation::Unpack &op) override;
void visit(const ir::operation::While &op) override;
+ void visit(const ir::operation::DetectionPostProcess &op) override;
private:
/**
diff --git a/runtime/onert/core/include/exec/DynamicShapeInferer.h b/runtime/onert/core/include/exec/DynamicShapeInferer.h
index 3d040e2cc..f814b789a 100644
--- a/runtime/onert/core/include/exec/DynamicShapeInferer.h
+++ b/runtime/onert/core/include/exec/DynamicShapeInferer.h
@@ -67,6 +67,7 @@ public:
void visit(const ir::operation::L2Normalization &op) override;
void visit(const ir::operation::LSTM &op) override;
void visit(const ir::operation::MatrixBandPart &op) override;
+ void visit(const ir::operation::DetectionPostProcess &op) override;
void visit(const ir::operation::OneHot &op) override;
void visit(const ir::operation::Pack &op) override;
void visit(const ir::operation::Pad &op) override;
diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h
index 45fadc474..0eb45e1ee 100644
--- a/runtime/onert/core/include/ir/Operations.Include.h
+++ b/runtime/onert/core/include/ir/Operations.Include.h
@@ -50,6 +50,7 @@
#include "ir/operation/LogSoftmax.h"
#include "ir/operation/LSTM.h"
#include "ir/operation/MatrixBandPart.h"
+#include "ir/operation/DetectionPostProcess.h"
#include "ir/operation/OneHot.h"
#include "ir/operation/Pack.h"
#include "ir/operation/Pad.h"
diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst
index 7f3c40b4b..f17fdfdd7 100644
--- a/runtime/onert/core/include/ir/Operations.lst
+++ b/runtime/onert/core/include/ir/Operations.lst
@@ -53,6 +53,7 @@ OP(LocalResponseNormalization)
OP(LogSoftmax)
OP(LSTM)
OP(MatrixBandPart)
+OP(DetectionPostProcess)
OP(OneHot)
OP(Pack)
OP(Pad)
diff --git a/runtime/onert/core/include/ir/operation/DetectionPostProcess.h b/runtime/onert/core/include/ir/operation/DetectionPostProcess.h
new file mode 100644
index 000000000..becb0e21a
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/DetectionPostProcess.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_MODEL_OPERATION_DETECTION_POST_PROCESS_NODE_H__
+#define __NEURUN_MODEL_OPERATION_DETECTION_POST_PROCESS_NODE_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class DetectionPostProcess : public Operation
+{
+public:
+ enum Input
+ {
+ BOXES = 0,
+ SCORES = 1,
+ INPUT_ANCHORS = 2
+ };
+
+ enum Output
+ {
+ BOX_COORDS = 0,
+ BOX_CLASSES = 1,
+ BOX_SCORES = 2,
+ NUM_SELECTED = 3
+ };
+
+ struct Scale
+ {
+ float y_scale;
+ float x_scale;
+ float h_scale;
+ float w_scale;
+ };
+
+ struct Param
+ {
+ int max_detections;
+ float score_threshold;
+ float iou_threshold; // intersection-over-union
+ int max_boxes_per_class;
+ int32_t num_classes;
+ int32_t max_classes_per_detection;
+ // N*N complexity instead of N*N*M, where N - number of boxes and M number of classes
+ bool center_size_boxes;
+ bool do_fast_eval = true;
+ Scale scale;
+ };
+
+public:
+ DetectionPostProcess(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+ const Param &param);
+
+public:
+ void accept(OperationVisitor &v) const override;
+
+ std::string getName() const { return "DetectionPostProcess"; }
+
+public:
+ const Param &param() const { return _param; }
+ OpCode opcode() const final { return OpCode::DetectionPostProcess; }
+
+private:
+ Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __NEURUN_MODEL_OPERATION_DETECTION_POST_PROCESS_NODE_H__
diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
index 5849a9801..f2fee2c3c 100644
--- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
@@ -1302,6 +1302,30 @@ void StaticShapeInferer::visit(const ir::operation::While &op)
}
}
+void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op)
+{
+ // TODO: NMS supports very limited input/output size.
+ ir::operation::DetectionPostProcess::Param param = op.param();
+
+ const int num_detected_boxes = param.max_detections * param.max_classes_per_detection;
+
+ const auto output_idx1 = op.getOutputs().at(0);
+ auto &output1 = _operands.at(output_idx1);
+ output1.info().shape({1, num_detected_boxes, 4});
+
+ const auto output_idx2 = op.getOutputs().at(1);
+ auto &output2 = _operands.at(output_idx2);
+ output2.info().shape({1, num_detected_boxes});
+
+ const auto output_idx3 = op.getOutputs().at(2);
+ auto &output3 = _operands.at(output_idx3);
+ output3.info().shape({1, num_detected_boxes});
+
+ const auto output_idx4 = op.getOutputs().at(3);
+ auto &output4 = _operands.at(output_idx4);
+ output4.info().shape({1});
+}
+
} // namespace compiler
} // namespace onert
diff --git a/runtime/onert/core/src/exec/DynamicShapeInferer.cc b/runtime/onert/core/src/exec/DynamicShapeInferer.cc
index dbf4eb28f..fb8058d23 100644
--- a/runtime/onert/core/src/exec/DynamicShapeInferer.cc
+++ b/runtime/onert/core/src/exec/DynamicShapeInferer.cc
@@ -601,6 +601,14 @@ void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op)
handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
}
+void DynamicShapeInferer::visit(const ir::operation::DetectionPostProcess & /* op */)
+{
+ // NOTE DetectionPostProcess's undefined outputs' shape are decided on compile time
+ // by static shape inferer.
+ // DetectionPostProcess's outputs' shape are independent with input shape
+ // and decided by parameter value.
+}
+
void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
{
auto output_ind = op.getOutputs().at(0);
diff --git a/runtime/onert/core/src/ir/OperationValidator.cc b/runtime/onert/core/src/ir/OperationValidator.cc
index 705a37e2c..094dbc0d5 100644
--- a/runtime/onert/core/src/ir/OperationValidator.cc
+++ b/runtime/onert/core/src/ir/OperationValidator.cc
@@ -211,6 +211,14 @@ void OperationValidator::visit(const operation::DepthToSpace &node)
OP_REQUIRES(block_size > 0);
}
+void OperationValidator::visit(const operation::DetectionPostProcess &node)
+{
+ auto param = node.param();
+
+ // FIXME: number of classes should be 1 for now.
+ OP_REQUIRES(param.num_classes == 1);
+}
+
void OperationValidator::visit(const operation::DepthwiseConv2D &node)
{
const auto input_index{node.getInputs().at(operation::DepthwiseConv2D::Input::INPUT)};
diff --git a/runtime/onert/core/src/ir/OperationValidator.h b/runtime/onert/core/src/ir/OperationValidator.h
index 9829ca095..b9bcc4ee8 100644
--- a/runtime/onert/core/src/ir/OperationValidator.h
+++ b/runtime/onert/core/src/ir/OperationValidator.h
@@ -55,6 +55,7 @@ public:
void visit(const operation::Conv2D &node) override;
void visit(const operation::DepthToSpace &node) override;
void visit(const operation::DepthwiseConv2D &node) override;
+ void visit(const operation::DetectionPostProcess &node) override;
void visit(const operation::ElementwiseActivation &node) override;
void visit(const operation::ElementwiseBinary &node) override;
void visit(const operation::ElementwiseUnary &node) override;
diff --git a/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc b/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc
new file mode 100644
index 000000000..cd708796d
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/DetectionPostProcess.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+DetectionPostProcess::DetectionPostProcess(const OperandIndexSequence &inputs,
+ const OperandIndexSequence &outputs, const Param &param)
+ : Operation(OperandConstraint::createExact(3u), inputs, outputs), _param(param)
+{
+}
+
+void DetectionPostProcess::accept(OperationVisitor &v) const { v.visit(*this); }
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h
index c444e7365..6ba7ee922 100644
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -142,6 +142,7 @@ private:
void loadIf(const Operator *op, ir::Graph &subg);
void loadLeakyRelu(const Operator *op, ir::Graph &subg);
void loadLogSoftmax(const Operator *op, ir::Graph &subg);
+ void loadDetectionPostProcess(const Operator *op, ir::Graph &subg);
void loadOneHot(const Operator *op, ir::Graph &subg);
void loadPack(const Operator *op, ir::Graph &subg);
void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
@@ -928,6 +929,45 @@ void BaseLoader<LoaderDomain>::loadGather(const Operator *op, ir::Graph &subg)
}
template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadDetectionPostProcess(const Operator *op, ir::Graph &subg)
+{
+ const flexbuffers::Map &m =
+ flexbuffers::GetRoot(op->custom_options()->data(), op->custom_options()->size()).AsMap();
+
+ ir::operation::DetectionPostProcess::Param param;
+
+ param.max_detections = m["max_detections"].AsInt32();
+
+ // TODO fixme
+ param.max_classes_per_detection = m["max_classes_per_detection"].AsInt32();
+ if (m["detections_per_class"].IsNull())
+ param.max_boxes_per_class = 100;
+ else
+ param.max_boxes_per_class = m["detections_per_class"].AsInt32();
+
+ if (m["use_regular_nms"].IsNull())
+ param.do_fast_eval = true;
+ else
+ param.do_fast_eval = !m["use_regular_nms"].AsBool();
+
+ param.score_threshold = m["nms_score_threshold"].AsFloat();
+ param.iou_threshold = m["nms_iou_threshold"].AsFloat();
+
+ // TODO add num classes support
+ param.num_classes = m["num_classes"].AsInt32();
+
+ param.scale.y_scale = m["y_scale"].AsFloat();
+ param.scale.x_scale = m["x_scale"].AsFloat();
+ param.scale.h_scale = m["h_scale"].AsFloat();
+ param.scale.w_scale = m["w_scale"].AsFloat();
+
+ // TODO depends on input model framework
+ param.center_size_boxes = true;
+
+ loadOperationTo<ir::operation::DetectionPostProcess>(op, subg, param);
+}
+
+template <typename LoaderDomain>
void BaseLoader<LoaderDomain>::loadBatchMatMul(const Operator *op, ir::Graph &subg)
{
ir::operation::BatchMatMul::Param param;
@@ -997,7 +1037,8 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
BroadcastTo,
FusedBatchNorm,
StatelessRandomUniform,
- Erf
+ Erf,
+ DetectionPostProcess
};
// Mapping from custom op name string to BuiltinOP enum
@@ -1011,6 +1052,7 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
{"BroadcastTo", BuiltinOP::BroadcastTo},
{"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
{"Erf", BuiltinOP::Erf},
+ {"TFLite_Detection_PostProcess", BuiltinOP::DetectionPostProcess},
};
try
@@ -1046,6 +1088,9 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
case BuiltinOP::Erf:
loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ERF);
break;
+ case BuiltinOP::DetectionPostProcess:
+ loadDetectionPostProcess(op, subg);
+ break;
default:
throw std::runtime_error{
"Loader: Custom OP map is defined but operation loader function is not defined"};
diff --git a/tests/nnfw_api/src/CircleGen.cc b/tests/nnfw_api/src/CircleGen.cc
index 579d68c86..0ffc8fb44 100644
--- a/tests/nnfw_api/src/CircleGen.cc
+++ b/tests/nnfw_api/src/CircleGen.cc
@@ -15,6 +15,7 @@
*/
#include "CircleGen.h"
+#include "flatbuffers/flexbuffers.h"
CircleGen::CircleGen() : _subgraph_contexts(1) // Create primary subgraph
{
@@ -189,6 +190,35 @@ uint32_t CircleGen::addOperatorDepthwiseConv2D(const OperatorParams &params,
circle::BuiltinOptions_DepthwiseConv2DOptions, options);
}
+uint32_t CircleGen::addOperatorDetectionPostProcess(const OperatorParams &params, int num_classes,
+ float y_scale, float x_scale, float h_scale,
+ float w_scale, float nms_score_threshold,
+ float nms_iou_threshold, int max_detections,
+ int max_classes_per_detection,
+ int detections_per_class)
+{
+ // flexbuffer custom_option
+ auto flex_buffers = std::make_unique<flexbuffers::Builder>();
+ size_t map_start = flex_buffers->StartMap();
+ flex_buffers->Int("num_classes", num_classes);
+ flex_buffers->Float("y_scale", y_scale);
+ flex_buffers->Float("x_scale", x_scale);
+ flex_buffers->Float("h_scale", h_scale);
+ flex_buffers->Float("w_scale", w_scale);
+ flex_buffers->Float("nms_iou_threshold", nms_iou_threshold);
+ flex_buffers->Float("nms_score_threshold", nms_score_threshold);
+ flex_buffers->Int("max_detections", max_detections);
+ flex_buffers->Int("max_classes_per_detection", max_classes_per_detection);
+ flex_buffers->Int("detections_per_class", detections_per_class);
+ flex_buffers->EndMap(map_start);
+ flex_buffers->Finish();
+
+ return addCustomOperatorWithOptions(params, "TFLite_Detection_PostProcess",
+ circle::BuiltinOptions_NONE, 0, &flex_buffers->GetBuffer(),
+ circle::CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS,
+ nullptr, nullptr);
+}
+
uint32_t CircleGen::addOperatorElu(const OperatorParams &params)
{
return addOperatorWithOptions(params, circle::BuiltinOperator_ELU, circle::BuiltinOptions_NONE,
@@ -523,6 +553,23 @@ uint32_t CircleGen::addOperatorWithOptions(const OperatorParams &params,
return ind;
}
+uint32_t CircleGen::addCustomOperatorWithOptions(
+ const OperatorParams &params, std::string custom_code, circle::BuiltinOptions options_type,
+ flatbuffers::Offset<void> options, const std::vector<uint8_t> *custom_options,
+ circle::CustomOptionsFormat custom_options_format,
+ const std::vector<uint8_t> *mutating_variable_inputs, const std::vector<int32_t> *intermediates)
+
+{
+ uint32_t opcode_ind = addCustomOperatorCode(custom_code);
+ auto op = circle::CreateOperatorDirect(
+ _fbb, opcode_ind, &params.inputs, &params.outputs, options_type, options, custom_options,
+ custom_options_format, mutating_variable_inputs, intermediates);
+
+ uint32_t ind = curSubgCtx().operators.size();
+ curSubgCtx().operators.emplace_back(op);
+ return ind;
+}
+
uint32_t CircleGen::addOperatorCode(circle::BuiltinOperator opcode)
{
// TODO If the same OperatorCode is registered already, just return it
@@ -531,6 +578,15 @@ uint32_t CircleGen::addOperatorCode(circle::BuiltinOperator opcode)
return ind;
}
+uint32_t CircleGen::addCustomOperatorCode(std::string custom_code)
+{
+ // TODO If the same OperatorCode is registered already, just return it
+ uint32_t ind = _opcodes.size();
+ _opcodes.emplace_back(
+ circle::CreateOperatorCodeDirect(_fbb, circle::BuiltinOperator_CUSTOM, custom_code.c_str()));
+ return ind;
+}
+
flatbuffers::Offset<circle::Buffer> CircleGen::buildBuffer(const uint8_t *buf, size_t size)
{
if (buf == nullptr && size == 0)
diff --git a/tests/nnfw_api/src/CircleGen.h b/tests/nnfw_api/src/CircleGen.h
index ab7707d5a..f6f799668 100644
--- a/tests/nnfw_api/src/CircleGen.h
+++ b/tests/nnfw_api/src/CircleGen.h
@@ -159,6 +159,11 @@ public:
int stride_w, int stride_h, int depth_multiplier,
circle::ActivationFunctionType actfn, int dilation_w = 1,
int dilation_h = 1);
+ uint32_t addOperatorDetectionPostProcess(const OperatorParams &params, int num_classes,
+ float y_scale, float x_scale, float h_scale,
+ float w_scale, float nms_score_threshold,
+ float nms_iou_threshold, int max_detections,
+ int max_classes_per_detection, int detections_per_class);
uint32_t addOperatorElu(const OperatorParams &params);
uint32_t addOperatorEqual(const OperatorParams &params);
uint32_t addOperatorExpandDims(const OperatorParams &params);
@@ -220,7 +225,15 @@ private:
uint32_t addOperatorWithOptions(const OperatorParams &params, circle::BuiltinOperator opcode,
circle::BuiltinOptions options_type,
flatbuffers::Offset<void> options);
+ uint32_t addCustomOperatorWithOptions(const OperatorParams &params, std::string custom_code,
+ circle::BuiltinOptions options_type,
+ flatbuffers::Offset<void> options,
+ const std::vector<uint8_t> *custom_options,
+ circle::CustomOptionsFormat custom_options_format,
+ const std::vector<uint8_t> *mutating_variable_inputs,
+ const std::vector<int32_t> *intermediates);
uint32_t addOperatorCode(circle::BuiltinOperator opcode);
+ uint32_t addCustomOperatorCode(std::string custom_code);
flatbuffers::Offset<circle::Buffer> buildBuffer(const uint8_t *buf, size_t size);
flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params);
flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params, float scale,
diff --git a/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc b/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc
index 3df7e7403..dda098698 100644
--- a/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc
+++ b/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc
@@ -36,28 +36,6 @@ class ArgMinMaxVariation : public GenModelTest,
// Reduce axis: 1
// Output shape: {1, 2, 1}
// Output type: Int32
-TEST_P(ArgMinMaxVariation, Test)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_INT32;
- std::vector<int32_t> axis_data{1};
- uint32_t axis_buf = cgen.addBuffer(axis_data);
- int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
- int out = cgen.addTensor({{1, 2, 1}, output_type});
- param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
- : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(param.tcd);
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
// Test with different input type and value
INSTANTIATE_TEST_CASE_P(
GenModelTest, ArgMinMaxVariation,
@@ -93,6 +71,28 @@ INSTANTIATE_TEST_CASE_P(
TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
circle::TensorType::TensorType_INT8, 1.0, 1}));
+TEST_P(ArgMinMaxVariation, Test)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ const auto output_type = circle::TensorType::TensorType_INT32;
+ std::vector<int32_t> axis_data{1};
+ uint32_t axis_buf = cgen.addBuffer(axis_data);
+ int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({{1, 2, 1}, output_type});
+ param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+ : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
TEST_F(GenModelTest, OneOp_ArgMax_Int64_AxisToConst)
{
CircleGen cgen;
@@ -132,35 +132,41 @@ TEST_F(GenModelTest, OneOp_ArgMax_AxisToVar)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_ArgMax_InvalidAxis0)
+TEST_P(ArgMinMaxVariation, neg_InvalidAxis0)
{
+ auto &param = GetParam();
+
CircleGen cgen;
const auto output_type = circle::TensorType::TensorType_INT32;
std::vector<int32_t> axis_data{4};
uint32_t axis_buf = cgen.addBuffer(axis_data);
int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
int out = cgen.addTensor({{1, 2, 1}, output_type});
- cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+ param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+ : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
_context->expectFailCompile();
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_ArgMax_InvalidAxis1)
+TEST_P(ArgMinMaxVariation, neg_InvalidAxis1)
{
+ auto &param = GetParam();
+
CircleGen cgen;
const auto output_type = circle::TensorType::TensorType_INT32;
std::vector<int32_t> axis_data{-3};
uint32_t axis_buf = cgen.addBuffer(axis_data);
int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
+ int in = cgen.addTensor({{2, 2}, param.input_type}, param.scale, param.zero_point);
int out = cgen.addTensor({{2}, output_type});
- cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+ param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+ : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
@@ -188,16 +194,19 @@ TEST_F(GenModelTest, neg_OneOp_ArgMax_InType)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_ArgMax_AxisType)
+TEST_P(ArgMinMaxVariation, neg_AxisType)
{
+ auto &param = GetParam();
+
CircleGen cgen;
- const auto output_type = circle::TensorType::TensorType_FLOAT32;
+ const auto output_type = circle::TensorType::TensorType_INT32;
std::vector<float> axis_data{4};
uint32_t axis_buf = cgen.addBuffer(axis_data);
int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, axis_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
int out = cgen.addTensor({{1, 2, 1}, output_type});
- cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+ param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+ : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
@@ -224,16 +233,20 @@ TEST_F(GenModelTest, neg_OneOp_ArgMax_OutType)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_ArgMax_paramType)
+TEST_P(ArgMinMaxVariation, neg_paramType)
{
+ auto &param = GetParam();
+
CircleGen cgen;
const auto output_type = circle::TensorType::TensorType_INT32;
+ const auto output_param = circle::TensorType::TensorType_INT64;
std::vector<int32_t> axis_data{4};
uint32_t axis_buf = cgen.addBuffer(axis_data);
int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
int out = cgen.addTensor({{1, 2, 1}, output_type});
- cgen.addOperatorArgMax({{in, axis}, {out}}, circle::TensorType::TensorType_INT64);
+ param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_param)
+ : cgen.addOperatorArgMin({{in, axis}, {out}}, output_param);
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
diff --git a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
index 2fb1d6898..15ddac210 100644
--- a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
+++ b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
@@ -42,27 +42,6 @@ class AveragePool2DVariation : public GenModelTest,
{
};
-TEST_P(AveragePool2DVariation, Test)
-{
- auto &param = GetParam();
- CircleGen cgen;
-
- int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
- param.type.zero_point);
- int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
- param.type.zero_point);
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
- param.param.stride_h, param.param.filter_w, param.param.filter_h,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(param.tcd);
- _context->setBackends(param.backend);
-
- SUCCEED();
-}
-
// Test with different input type and value
INSTANTIATE_TEST_CASE_P(
GenModelTest, AveragePool2DVariation,
@@ -108,6 +87,27 @@ INSTANTIATE_TEST_CASE_P(
{circle::TensorType::TensorType_INT8, 2.0, -1},
{"cpu"}}));
+TEST_P(AveragePool2DVariation, Test)
+{
+ auto &param = GetParam();
+ CircleGen cgen;
+
+ int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+ param.param.stride_h, param.param.filter_w, param.param.filter_h,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends(param.backend);
+
+ SUCCEED();
+}
+
TEST_F(GenModelTest, neg_OneOp_AvgPool2D_3DInput)
{
// 3D Tensors are not supported
@@ -142,13 +142,18 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_2DInput)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidPaddingType)
+TEST_P(AveragePool2DVariation, neg_InvalidPaddingType)
{
+ auto &param = GetParam();
CircleGen cgen;
- int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAveragePool2D({{in}, {out}}, static_cast<circle::Padding>(99), 2, 2, 2, 2,
- circle::ActivationFunctionType_NONE);
+
+ int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ cgen.addOperatorAveragePool2D({{in}, {out}}, static_cast<circle::Padding>(99),
+ param.param.stride_w, param.param.stride_h, param.param.filter_w,
+ param.param.filter_h, circle::ActivationFunctionType_NONE);
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
@@ -157,12 +162,17 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidPaddingType)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidFilterSize_1)
+TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_1)
{
+ auto &param = GetParam();
CircleGen cgen;
- int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, -1, 2,
+
+ int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+ param.param.stride_h, -1, param.param.filter_h,
circle::ActivationFunctionType_NONE);
cgen.setInputsAndOutputs({in}, {out});
@@ -172,12 +182,17 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidFilterSize_1)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidFilterSize_2)
+TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_2)
{
+ auto &param = GetParam();
CircleGen cgen;
- int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 0,
+
+ int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+ param.param.stride_h, param.param.filter_w, 0,
circle::ActivationFunctionType_NONE);
cgen.setInputsAndOutputs({in}, {out});
@@ -187,12 +202,17 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidFilterSize_2)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidStrides_1)
+TEST_P(AveragePool2DVariation, neg_InvalidStrides_1)
{
+ auto &param = GetParam();
CircleGen cgen;
- int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 0, 2, 2, 2,
+
+ int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 0, param.param.stride_h,
+ param.param.filter_w, param.param.filter_h,
circle::ActivationFunctionType_NONE);
cgen.setInputsAndOutputs({in}, {out});
@@ -202,12 +222,17 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidStrides_1)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidStrides_2)
+TEST_P(AveragePool2DVariation, neg_InvalidStrides_2)
{
+ auto &param = GetParam();
CircleGen cgen;
- int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 1, -100, 2, 2,
+
+ int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+ param.type.zero_point);
+ cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, -100,
+ param.param.filter_w, param.param.filter_h,
circle::ActivationFunctionType_NONE);
cgen.setInputsAndOutputs({in}, {out});
diff --git a/tests/nnfw_api/src/one_op_tests/Concat.cc b/tests/nnfw_api/src/one_op_tests/Concat.cc
index 6e2435965..f4397ba66 100644
--- a/tests/nnfw_api/src/one_op_tests/Concat.cc
+++ b/tests/nnfw_api/src/one_op_tests/Concat.cc
@@ -59,25 +59,6 @@ class ConcatVariation : public GenModelTest,
// Input shape: {2, 3} / {2, 3}
// Output shape: {4, 3}
-TEST_P(ConcatVariation, Test)
-{
- auto &param = GetParam();
-
- CircleGen cgen;
- int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
- int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
- int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
- cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0,
- circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({input1, input2}, {output});
-
- _context = std::make_unique<GenModelTestContext>(cgen.finish());
- _context->addTestCase(param.tcd);
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
INSTANTIATE_TEST_CASE_P(
GenModelTest, ConcatVariation,
::testing::Values(
@@ -107,6 +88,25 @@ INSTANTIATE_TEST_CASE_P(
{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
circle::TensorType::TensorType_INT64}));
+TEST_P(ConcatVariation, Test)
+{
+ auto &param = GetParam();
+
+ CircleGen cgen;
+ int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+ int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+ int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
+ cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0,
+ circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({input1, input2}, {output});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(param.tcd);
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D)
{
CircleGen cgen;
@@ -180,13 +180,14 @@ TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_Concat_InvalidAxis)
+TEST_P(ConcatVariation, neg_InvalidAxis)
{
- CircleGen cgen;
+ auto &param = GetParam();
- int input1 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
- int input2 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
- int output = cgen.addTensor({{4, 3}, circle::TensorType::TensorType_FLOAT32});
+ CircleGen cgen;
+ int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+ int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+ int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
int axis = 2;
cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
@@ -200,13 +201,14 @@ TEST_F(GenModelTest, neg_OneOp_Concat_InvalidAxis)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_Concat_InvalidRank)
+TEST_P(ConcatVariation, neg_InvalidRank)
{
- CircleGen cgen;
+ auto &param = GetParam();
- int input1 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
- int input2 = cgen.addTensor({{1, 2, 3}, circle::TensorType::TensorType_FLOAT32});
- int output = cgen.addTensor({{1, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+ CircleGen cgen;
+ int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+ int input2 = cgen.addTensor({{1, 2, 3}, param.type}, param.scale, param.zero_point);
+ int output = cgen.addTensor({{1, 4, 3}, param.type}, param.scale, param.zero_point);
int axis = 0;
cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
@@ -220,13 +222,14 @@ TEST_F(GenModelTest, neg_OneOp_Concat_InvalidRank)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_Concat_InvalidDimension)
+TEST_P(ConcatVariation, neg_InvalidDimension)
{
- CircleGen cgen;
+ auto &param = GetParam();
- int input1 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
- int input2 = cgen.addTensor({{3, 2}, circle::TensorType::TensorType_FLOAT32});
- int output = cgen.addTensor({{4, 3}, circle::TensorType::TensorType_FLOAT32});
+ CircleGen cgen;
+ int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+ int input2 = cgen.addTensor({{3, 2}, param.type}, param.scale, param.zero_point);
+ int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
int axis = 0;
cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
diff --git a/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc b/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc
index 9f563401f..a4fe88493 100644
--- a/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc
+++ b/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc
@@ -29,6 +29,9 @@ class DepthToSpaceVariation : public GenModelTest,
{
};
+// Input shape: {1, 1, 2, 4}
+// Block size: 2
+// Output shape: {1, 2, 4, 1}
INSTANTIATE_TEST_CASE_P(
GenModelTest, DepthToSpaceVariation,
::testing::Values(
@@ -52,9 +55,6 @@ INSTANTIATE_TEST_CASE_P(
uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
circle::TensorType::TensorType_INT8, 1.0f, -2}));
-// Input shape: {1, 1, 2, 4}
-// Block size: 2
-// Output shape: {1, 2, 4, 1}
TEST_P(DepthToSpaceVariation, Test)
{
auto &param = GetParam();
@@ -72,12 +72,13 @@ TEST_P(DepthToSpaceVariation, Test)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_DepthToSpace_Blocksize)
+TEST_P(DepthToSpaceVariation, neg_Blocksize)
{
+ auto &param = GetParam();
+
CircleGen cgen;
- circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
- int in = cgen.addTensor({{1, 1, 2, 4}, data_type});
- int out = cgen.addTensor({{1, 2, 4, 1}, data_type});
+ int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point);
cgen.addOperatorDepthToSpace({{in}, {out}}, -2);
cgen.setInputsAndOutputs({in}, {out});
diff --git a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc
index 658c44cb9..a0bdbf9e6 100644
--- a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc
+++ b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc
@@ -257,50 +257,6 @@ class DepthwiseConv2DQuantTest
using DepthwiseConv2DQuantTestParamU8 = DepthwiseConv2DQuantTestParam<uint8_t>;
using DepthwiseConv2DQuantTestU8 = DepthwiseConv2DQuantTest<uint8_t>;
-CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier)
-{
- assert(1 <= stride && stride <= 2);
- assert(1 <= input_depth && input_depth <= 16);
- assert(1 <= depth_multiplier && depth_multiplier <= 32);
-
- const int output_depth = input_depth * depth_multiplier;
- assert(1 <= output_depth && output_depth <= 32);
-
- CircleGen cgen;
- uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
- 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
- 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
- uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
- int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0);
- int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
- int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
- int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_UINT8}, 1, 0);
- cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
- stride, depth_multiplier, circle::ActivationFunctionType_NONE);
- cgen.setInputsAndOutputs({in}, {out});
- return cgen.finish();
-}
-
-TEST_P(DepthwiseConv2DQuantTestU8, Test)
-{
- // Same input is used for all tests but output differs
- static const std::vector<uint8_t> input64{
- 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
- 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
-
- auto &param = GetParam();
- _context = std::make_unique<GenModelTestContext>(
- genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier));
- std::vector<uint8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
- _context->addTestCase(uniformTCD<uint8_t>({ref_input}, {param.ref_output}));
- _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
- SUCCEED();
-}
-
// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
// kernels.
INSTANTIATE_TEST_CASE_P(
@@ -337,10 +293,7 @@ INSTANTIATE_TEST_CASE_P(
DepthwiseConv2DQuantTestParamU8{
2, 16, 1, std::vector<uint8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
-using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam<int8_t>;
-using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest<int8_t>;
-
-CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier)
+CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier)
{
assert(1 <= stride && stride <= 2);
assert(1 <= input_depth && input_depth <= 16);
@@ -350,40 +303,43 @@ CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int dep
assert(1 <= output_depth && output_depth <= 32);
CircleGen cgen;
- uint32_t ker_buf = cgen.addBuffer(std::vector<int8_t>{
+ uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
- int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0);
- int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0);
+ int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0);
+ int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
- int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0);
+ int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_UINT8}, 1, 0);
cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
stride, depth_multiplier, circle::ActivationFunctionType_NONE);
cgen.setInputsAndOutputs({in}, {out});
return cgen.finish();
}
-TEST_P(DepthwiseConv2DQuantTestI8, Test)
+TEST_P(DepthwiseConv2DQuantTestU8, Test)
{
// Same input is used for all tests but output differs
- static const std::vector<int8_t> input64{
+ static const std::vector<uint8_t> input64{
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
auto &param = GetParam();
_context = std::make_unique<GenModelTestContext>(
- genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier));
- std::vector<int8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
- _context->addTestCase(uniformTCD<int8_t>({ref_input}, {param.ref_output}));
+ genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier));
+ std::vector<uint8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
+ _context->addTestCase(uniformTCD<uint8_t>({ref_input}, {param.ref_output}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
SUCCEED();
}
+using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam<int8_t>;
+using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest<int8_t>;
+
// Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
// kernels.
INSTANTIATE_TEST_CASE_P(
@@ -420,6 +376,50 @@ INSTANTIATE_TEST_CASE_P(
DepthwiseConv2DQuantTestParamI8{
2, 16, 1, std::vector<int8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
+CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier)
+{
+ assert(1 <= stride && stride <= 2);
+ assert(1 <= input_depth && input_depth <= 16);
+ assert(1 <= depth_multiplier && depth_multiplier <= 32);
+
+ const int output_depth = input_depth * depth_multiplier;
+ assert(1 <= output_depth && output_depth <= 32);
+
+ CircleGen cgen;
+ uint32_t ker_buf = cgen.addBuffer(std::vector<int8_t>{
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+ 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+ 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
+ uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
+ int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0);
+ int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0);
+ int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
+ int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0);
+ cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
+ stride, depth_multiplier, circle::ActivationFunctionType_NONE);
+ cgen.setInputsAndOutputs({in}, {out});
+ return cgen.finish();
+}
+
+TEST_P(DepthwiseConv2DQuantTestI8, Test)
+{
+ // Same input is used for all tests but output differs
+ static const std::vector<int8_t> input64{
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
+ 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
+
+ auto &param = GetParam();
+ _context = std::make_unique<GenModelTestContext>(
+ genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier));
+ std::vector<int8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
+ _context->addTestCase(uniformTCD<int8_t>({ref_input}, {param.ref_output}));
+ _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+ SUCCEED();
+}
+
TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType)
{
_context = std::make_unique<GenModelTestContext>(genNegTestDepthwiseConv2DModel(
diff --git a/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc b/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc
new file mode 100644
index 000000000..188638bbb
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_DetectionPostProcess_SingleBox)
+{
+ CircleGen cgen;
+
+ int boxes = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+ int scores = cgen.addTensor({{1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
+ int anchors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+
+ int box_coors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+ int box_classes = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+ int box_scores = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+ int num_selected = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorDetectionPostProcess(
+ {{boxes, scores, anchors}, {box_coors, box_classes, box_scores, num_selected}}, 1, 10, 10, 5, 5,
+ 0.8, 0.5, 1, 1, 1);
+ cgen.setInputsAndOutputs({boxes, scores, anchors},
+ {box_coors, box_classes, box_scores, num_selected});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{0, 0, 0, 0}, {0, 0.9}, {0, 0, 1, 1}},
+ {{-0.5, -0.5, 0.5, 0.5}, {0}, {0.9}, {1}}));
+ _context->setBackends({"cpu"});
+
+ SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DetectionPostProcess_SinglBox_MultiClasses)
+{
+ CircleGen cgen;
+
+ int boxes = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+ int scores = cgen.addTensor({{1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+ int anchors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+
+ int box_coors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+ int box_classes = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+ int box_scores = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+ int num_selected = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+
+ cgen.addOperatorDetectionPostProcess(
+ {{boxes, scores, anchors}, {box_coors, box_classes, box_scores, num_selected}}, 2, 10, 10, 5, 5,
+ 0.8, 0.5, 1, 1, 1);
+ cgen.setInputsAndOutputs({boxes, scores, anchors},
+ {box_coors, box_classes, box_scores, num_selected});
+
+ _context = std::make_unique<GenModelTestContext>(cgen.finish());
+ _context->addTestCase(uniformTCD<float>({{0, 0, 0, 0}, {0, 0.7, 0.9}, {0, 0, 1, 1}},
+ {{-0.5, -0.5, 0.5, 0.5}, {1}, {0.9}, {1}}));
+ _context->setBackends({"cpu"});
+ _context->expectFailModelLoad();
+
+ SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Pad.cc b/tests/nnfw_api/src/one_op_tests/Pad.cc
index 42971da79..c376c1c02 100644
--- a/tests/nnfw_api/src/one_op_tests/Pad.cc
+++ b/tests/nnfw_api/src/one_op_tests/Pad.cc
@@ -31,6 +31,21 @@ class PadVariation : public GenModelTest, public ::testing::WithParamInterface<P
{
};
+// Test with different value type
+INSTANTIATE_TEST_CASE_P(
+ GenModelTest, PadVariation,
+ ::testing::Values(
+ // float value
+ PadParam{uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})},
+ // uint8 value
+ PadParam{
+ uniformTCD<uint8_t>({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}),
+ circle::TensorType::TensorType_UINT8, 1.0, 8},
+ // int8 value
+ PadParam{uniformTCD<int8_t>({{-2, -1, 1, 2}},
+ {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}),
+ circle::TensorType::TensorType_INT8, 1.0, -5}));
+
TEST_P(PadVariation, Test)
{
auto &param = GetParam();
@@ -51,29 +66,16 @@ TEST_P(PadVariation, Test)
SUCCEED();
}
-// Test with different value type
-INSTANTIATE_TEST_CASE_P(
- GenModelTest, PadVariation,
- ::testing::Values(
- // float value
- PadParam{uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})},
- // uint8 value
- PadParam{
- uniformTCD<uint8_t>({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}),
- circle::TensorType::TensorType_UINT8, 1.0, 8},
- // int8 value
- PadParam{uniformTCD<int8_t>({{-2, -1, 1, 2}},
- {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}),
- circle::TensorType::TensorType_INT8, 1.0, -5}));
-
-TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadRank)
+TEST_P(PadVariation, neg_InvalidPadRank)
{
+ auto &param = GetParam();
+
CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
std::vector<int32_t> padding_data{1, 1, 1, 1};
uint32_t padding_buf = cgen.addBuffer(padding_data);
int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
- int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
cgen.addOperatorPad({{in, padding}, {out}});
cgen.setInputsAndOutputs({in}, {out});
@@ -85,14 +87,16 @@ TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadRank)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim0)
+TEST_P(PadVariation, neg_InvalidPadDim0)
{
+ auto &param = GetParam();
+
CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
std::vector<int32_t> padding_data{1, 1, 1, 1};
uint32_t padding_buf = cgen.addBuffer(padding_data);
int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
- int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
cgen.addOperatorPad({{in, padding}, {out}});
cgen.setInputsAndOutputs({in}, {out});
@@ -104,14 +108,16 @@ TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim0)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim1)
+TEST_P(PadVariation, neg_InvalidPadDim1)
{
+ auto &param = GetParam();
+
CircleGen cgen;
- int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
std::vector<int32_t> padding_data{1, 1, 1, 1};
uint32_t padding_buf = cgen.addBuffer(padding_data);
int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
- int out = cgen.addTensor({{2, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+ int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
cgen.addOperatorPad({{in, padding}, {out}});
cgen.setInputsAndOutputs({in}, {out});
@@ -123,14 +129,20 @@ TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim1)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_Pad_Type)
+TEST_P(PadVariation, neg_Type)
{
+ auto &param = GetParam();
+
+ const circle::TensorType output_type = ((param.data_type == circle::TensorType::TensorType_UINT8)
+ ? circle::TensorType::TensorType_INT8
+ : circle::TensorType::TensorType_UINT8);
+
CircleGen cgen;
- int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+ int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
uint32_t padding_buf = cgen.addBuffer(padding_data);
int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
- int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1);
+ int out = cgen.addTensor({{1, 4, 4, 1}, output_type}, 1.0, 0);
cgen.addOperatorPad({{in, padding}, {out}});
cgen.setInputsAndOutputs({in}, {out});
diff --git a/tests/nnfw_api/src/one_op_tests/Slice.cc b/tests/nnfw_api/src/one_op_tests/Slice.cc
index 960cd88e3..002fb0132 100644
--- a/tests/nnfw_api/src/one_op_tests/Slice.cc
+++ b/tests/nnfw_api/src/one_op_tests/Slice.cc
@@ -34,6 +34,32 @@ class SliceVariation : public GenModelTest,
{
};
+INSTANTIATE_TEST_CASE_P(
+ GenModelTest, SliceVariation,
+ ::testing::Values(
+ SliceVariationParam{
+ {2, 2, 3, 1},
+ {0, 1, 1, 0},
+ {1, 1, 2, 1},
+ uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})},
+ SliceVariationParam{
+ {2, 2, 3, 1},
+ {0, 1, 1, 0},
+ {1, 1, 2, 1},
+ uniformTCD<uint8_t>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
+ circle::TensorType::TensorType_UINT8,
+ 1,
+ 0},
+ SliceVariationParam{
+ {2, 2, 3, 1},
+ {0, 1, 1, 0},
+ {1, 1, 2, 1},
+ uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
+ circle::TensorType::TensorType_FLOAT32,
+ 0,
+ 0,
+ circle::TensorType::TensorType_INT64}));
+
TEST_P(SliceVariation, Test)
{
auto &param = GetParam();
@@ -90,32 +116,6 @@ TEST_P(SliceVariation, Test)
SUCCEED();
}
-INSTANTIATE_TEST_CASE_P(
- GenModelTest, SliceVariation,
- ::testing::Values(
- SliceVariationParam{
- {2, 2, 3, 1},
- {0, 1, 1, 0},
- {1, 1, 2, 1},
- uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})},
- SliceVariationParam{
- {2, 2, 3, 1},
- {0, 1, 1, 0},
- {1, 1, 2, 1},
- uniformTCD<uint8_t>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
- circle::TensorType::TensorType_UINT8,
- 1,
- 0},
- SliceVariationParam{
- {2, 2, 3, 1},
- {0, 1, 1, 0},
- {1, 1, 2, 1},
- uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
- circle::TensorType::TensorType_FLOAT32,
- 0,
- 0,
- circle::TensorType::TensorType_INT64}));
-
TEST_F(GenModelTest, neg_OneOp_Slice_Type)
{
CircleGen cgen;
@@ -136,18 +136,48 @@ TEST_F(GenModelTest, neg_OneOp_Slice_Type)
SUCCEED();
}
-TEST_F(GenModelTest, neg_OneOp_Slice_DiffType)
+TEST_P(SliceVariation, neg_DiffType)
{
+ auto &param = GetParam();
+
CircleGen cgen;
- int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
- std::vector<int32_t> begins_data = {0, 0, 1, 0};
- uint32_t begins_buf = cgen.addBuffer(begins_data);
- int begins = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, begins_buf});
- std::vector<int64_t> sizes_data = {1, 2, 1, 1};
- uint32_t sizes_buf = cgen.addBuffer(sizes_data);
- int sizes = cgen.addTensor({{4}, circle::TensorType::TensorType_INT64, sizes_buf});
- int out = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_FLOAT32});
- cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+
+ int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point);
+ int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point);
+ if (param.begins_type == circle::TensorType::TensorType_INT32)
+ {
+ uint32_t begins_buf = cgen.addBuffer(param.begins);
+ std::vector<int64_t> sizes_64(param.sizes.size());
+ for (int i = 0; i < param.begins.size(); i++)
+ {
+ sizes_64[i] = param.sizes[i];
+ }
+
+ int rank = param.begins.size();
+ int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+ uint32_t sizes_buf = cgen.addBuffer(sizes_64);
+ int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT64, sizes_buf});
+
+ cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+ }
+ else if (param.begins_type == circle::TensorType::TensorType_INT64)
+ {
+ std::vector<int64_t> begins_64(param.begins.size());
+ for (int i = 0; i < param.begins.size(); i++)
+ {
+ begins_64[i] = param.begins[i];
+ }
+
+ uint32_t begins_buf = cgen.addBuffer(begins_64);
+ int rank = param.begins.size();
+ int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+ uint32_t sizes_buf = cgen.addBuffer(param.sizes);
+ int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT32, sizes_buf});
+
+ cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+ }
cgen.setInputsAndOutputs({in}, {out});
_context = std::make_unique<GenModelTestContext>(cgen.finish());
diff --git a/tests/nnfw_api/src/one_op_tests/Softmax.cc b/tests/nnfw_api/src/one_op_tests/Softmax.cc
index 95debec33..aba4e89a0 100644
--- a/tests/nnfw_api/src/one_op_tests/Softmax.cc
+++ b/tests/nnfw_api/src/one_op_tests/Softmax.cc
@@ -30,6 +30,23 @@ class SoftmaxVariation : public GenModelTest, public ::testing::WithParamInterfa
{
};
+// Test with different value type
+INSTANTIATE_TEST_CASE_P(
+ GenModelTest, SoftmaxVariation,
+ ::testing::Values(
+ // float value
+ SoftmaxParam{
+ uniformTCD<float>({{0, -6, 2, 4, 3, -2, 10, 1}},
+ {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})},
+ // uint8 value
+ SoftmaxParam{
+ uniformTCD<uint8_t>({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}),
+ circle::TensorType::TensorType_UINT8, 1.0, 10},
+ // int8 value
+ SoftmaxParam{
+ uniformTCD<int8_t>({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}),
+ circle::TensorType::TensorType_INT8, 1.0, 0}));
+
TEST_P(SoftmaxVariation, Test)
{
auto &param = GetParam();
@@ -95,28 +112,14 @@ TEST_F(GenModelTest, OneOp_Softmax)
SUCCEED();
}
-// Test with different value type
-INSTANTIATE_TEST_CASE_P(
- GenModelTest, SoftmaxVariation,
- ::testing::Values(
- // float value
- SoftmaxParam{
- uniformTCD<float>({{0, -6, 2, 4, 3, -2, 10, 1}},
- {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})},
- // uint8 value
- SoftmaxParam{
- uniformTCD<uint8_t>({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}),
- circle::TensorType::TensorType_UINT8, 1.0, 10},
- // int8 value
- SoftmaxParam{
- uniformTCD<int8_t>({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}),
- circle::TensorType::TensorType_INT8, 1.0, 0}));
-
-TEST_F(GenModelTest, neg_OneOp_Softmax_Type)
+TEST_P(SoftmaxVariation, neg_Type)
{
+ auto &param = GetParam();
+
CircleGen cgen;
- int input = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
- int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+ int input =
+ cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point);
+ int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_BOOL});
cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
cgen.setInputsAndOutputs({input}, {out});
diff --git a/tools/release_tool/onert_version.sh b/tools/release_tool/onert_version.sh
index 5c875e38b..374a58acf 100755
--- a/tools/release_tool/onert_version.sh
+++ b/tools/release_tool/onert_version.sh
@@ -27,7 +27,7 @@ show_version() {
current_version=${version_line#"Version:"}
if [ $nightly -eq 0 ]; then
- echo $current_version~$(date "+%y%m%d%H")
+ echo $current_version~$(date -u "+%y%m%d%H")
else
echo $current_version
fi